Spaces:

mutisya
/

polyglot-backend-quant

Sleeping

App Files Files Community

mutisya commited on Oct 23, 2025

Commit

52ed889

verified ·

1 Parent(s): d6e8bff

Remove app folder - code will be downloaded from private code space during build

Browse files

Files changed (26) hide show

app/__init__.py +0 -1
app/auth.py +0 -310
app/config/__init__.py +0 -7
app/config/cors.py +0 -295
app/main.py +0 -345
app/main.py.bak +0 -345
app/models/__init__.py +0 -77
app/routers/__init__.py +0 -1
app/routers/add_phase_endpoints.py +0 -490
app/routers/learning.py +0 -1020
app/routers/mobile.py +0 -536
app/routers/sessions.py +0 -200
app/routers/watch.py +0 -152
app/services/__init__.py +0 -1
app/services/learning_data_service.py +0 -415
app/services/quantization_utils.py +0 -124
app/services/session_manager.py +0 -180
app/services/transcription_service.py +0 -736
app/services/transcription_service.py.bak +0 -726
app/services/transcription_service_onnx.py +0 -682
app/services/transcription_service_onnx_optimized.py +0 -251
app/services/translation_service.py +0 -151
app/services/translation_service_onnx.py +0 -268
app/services/tts_service.py +0 -541
app/services/tts_service_onnx.py +0 -587
app/services/websocket_manager.py +0 -909

app/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # Backend application package

app/auth.py DELETED Viewed

@@ -1,310 +0,0 @@
-"""
-Authentication module for HuggingFace token validation
-"""
-import os
-from typing import Optional
-from fastapi import HTTPException, status, Request
-from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from fastapi.security.utils import get_authorization_scheme_param
-def is_local_development() -> bool:
-    """
-    Detect if the application is running in local development mode.
-    This checks multiple indicators to determine if auth should be disabled.
-    """
-    # Method 1: Explicit disable auth flag
-    disable_auth = os.getenv('DISABLE_AUTH', '').lower()
-    if disable_auth in ['true', '1', 'yes']:
-        return True
-    # Method 2: Check ENVIRONMENT variable
-    environment = os.getenv('ENVIRONMENT', '').lower()
-    if environment in ['development', 'dev', 'local']:
-        return True
-    # Method 3: Check DEBUG flag
-    debug = os.getenv('DEBUG', '').lower()
-    if debug in ['true', '1', 'yes']:
-        return True
-    # Method 4: Check if running on localhost/development ports
-    host = os.getenv('HOST', '')
-    port = os.getenv('PORT', '')
-    if host in ['localhost', '127.0.0.1', '0.0.0.0'] and port == '7860':
-        return True
-    # Method 5: Check for presence of local development files
-    local_indicators = [
-        '.env.local',
-        'docker-compose.local.yml',
-        'Dockerfile.local'
-    ]
-    for indicator in local_indicators:
-        if os.path.exists(indicator):
-            return True
-    # Method 6: Check if we're in a Docker container with local development setup
-    if os.path.exists('/.dockerenv'):
-        # We're in Docker, check if it's local development
-        if os.getenv('ALLOW_ALL_ORIGINS', '').lower() == 'true':
-            return True
-    return False
-class HuggingFaceTokenAuth:
-    """HuggingFace token authentication handler"""
-    def __init__(self):
-        self.bearer = HTTPBearer(auto_error=False)
-        self.is_local = is_local_development()
-        if self.is_local:
-            print("🔓 RUNNING IN LOCAL DEVELOPMENT MODE - AUTH DISABLED")
-            print("   Environment indicators:")
-            print(f"   - DISABLE_AUTH: {os.getenv('DISABLE_AUTH', 'not set')}")
-            print(f"   - ENVIRONMENT: {os.getenv('ENVIRONMENT', 'not set')}")
-            print(f"   - DEBUG: {os.getenv('DEBUG', 'not set')}")
-            print(f"   - HOST: {os.getenv('HOST', 'not set')}")
-            print(f"   - PORT: {os.getenv('PORT', 'not set')}")
-            print(f"   - ALLOW_ALL_ORIGINS: {os.getenv('ALLOW_ALL_ORIGINS', 'not set')}")
-            print(f"   - Docker container: {os.path.exists('/.dockerenv')}")
-            print(f"   - .env.local exists: {os.path.exists('.env.local')}")
-        else:
-            print("🔒 RUNNING IN PRODUCTION MODE - AUTH REQUIRED")
-    def verify_token(self, token: str) -> bool:
-        """
-        Verify if the token is a valid HuggingFace token.
-        In local development mode, always returns True.
-        """
-        # Skip token validation in local development
-        if self.is_local:
-            print("🔓 Local development mode: skipping token validation")
-            return True
-        try:
-            if not token:
-                return False
-            if not isinstance(token, str):
-                print(f"❌ Token is not a string: {type(token)}")
-                return False
-            # HuggingFace tokens start with 'hf_'
-            if not token.startswith('hf_'):
-                print(f"❌ Token does not start with 'hf_': {token[:10]}...")
-                return False
-            # Additional validation can be added here
-            # For example, you could make a request to HuggingFace API
-            # to validate the token, but that would add latency
-            return True
-        except Exception as e:
-            print(f"❌ Error in verify_token: {e}")
-            return False
-    def get_token_from_request(self, request: Request) -> Optional[str]:
-        """Extract token from various sources in the request"""
-        # Method 1: Authorization header
-        authorization = request.headers.get("Authorization")
-        if authorization:
-            scheme, token = get_authorization_scheme_param(authorization)
-            if scheme.lower() == "bearer":
-                return token
-        # Method 2: Query parameter (for WebSocket initial handshake)
-        token = request.query_params.get("token")
-        if token:
-            return token
-        # Method 3: Custom header (alternative)
-        token = request.headers.get("X-HF-Token")
-        if token:
-            return token
-        return None
-    async def authenticate_request(self, request: Request) -> bool:
-        """Authenticate a request using HuggingFace token"""
-        token = self.get_token_from_request(request)
-        if not token:
-            return False
-        return self.verify_token(token)
-# Global instance
-hf_auth = HuggingFaceTokenAuth()
-async def require_hf_token(request: Request) -> str:
-    """
-    FastAPI dependency that requires a valid HuggingFace token.
-    In local development mode, returns a dummy token.
-    Returns the token if valid, raises HTTPException if not.
-    """
-    # Skip authentication in local development
-    if hf_auth.is_local:
-        print("🔓 Local development mode: bypassing HF token requirement")
-        return "local-development-bypass"
-    token = hf_auth.get_token_from_request(request)
-    if not token:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="HuggingFace token required. Please provide a valid token in Authorization header.",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-    if not hf_auth.verify_token(token):
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid HuggingFace token. Token must start with 'hf_'.",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-    return token
-async def optional_hf_token(request: Request) -> Optional[str]:
-    """
-    FastAPI dependency that optionally validates HuggingFace token.
-    In local development mode, returns a dummy token if no real token provided.
-    Returns the token if present and valid, None otherwise.
-    Useful for endpoints that work with or without authentication.
-    """
-    # In local development, always return a token
-    if hf_auth.is_local:
-        token = hf_auth.get_token_from_request(request)
-        if token and hf_auth.verify_token(token):
-            return token
-        else:
-            print("🔓 Local development mode: providing dummy token for optional auth")
-            return "local-development-bypass"
-    token = hf_auth.get_token_from_request(request)
-    if not token:
-        return None
-    if hf_auth.verify_token(token):
-        return token
-    return None
-def authenticate_websocket_connect(environ: dict) -> bool:
-    """
-    Authenticate WebSocket connection using token from various sources.
-    In local development mode, always returns True.
-    This is called during the Socket.IO connect event.
-    """
-    # Skip authentication in local development
-    if hf_auth.is_local:
-        print("🔓 Local development mode: bypassing WebSocket authentication")
-        return True
-    try:
-        print("=== WEBSOCKET ENVIRON AUTHENTICATION ===")
-        print(f"Environ type: {type(environ)}")
-        if not isinstance(environ, dict):
-            print(f"❌ Environ is not a dict: {type(environ)}")
-            return False
-        # Method 1: Check query parameters
-        query_string = environ.get('QUERY_STRING', '')
-        print(f"Query string: {query_string}")
-        if query_string:
-            from urllib.parse import parse_qs
-            query_params = parse_qs(query_string)
-            print(f"Parsed query params: {query_params}")
-            tokens = query_params.get('token', [])
-            if tokens:
-                token = tokens[0]
-                print(f"Found token in query: {token[:10]}...")
-                if hf_auth.verify_token(token):
-                    print("✓ Token validated via query params")
-                    return True
-        # Method 2: Check headers
-        auth_header = environ.get('HTTP_AUTHORIZATION', '')
-        print(f"Authorization header: {auth_header[:20] if auth_header else 'None'}...")
-        if auth_header:
-            if auth_header.startswith('Bearer '):
-                token = auth_header[7:]  # Remove 'Bearer ' prefix
-                print(f"Found token in Authorization header: {token[:10]}...")
-                if hf_auth.verify_token(token):
-                    print("✓ Token validated via Authorization header")
-                    return True
-        # Method 3: Check custom header
-        hf_token_header = environ.get('HTTP_X_HF_TOKEN', '')
-        print(f"X-HF-Token header: {hf_token_header[:10] if hf_token_header else 'None'}...")
-        if hf_token_header:
-            if hf_auth.verify_token(hf_token_header):
-                print("✓ Token validated via X-HF-Token header")
-                return True
-        print("❌ No valid token found in environ")
-        print(f"Available environ keys: {list(environ.keys())}")
-        return False
-    except Exception as e:
-        print(f"❌ Error in authenticate_websocket_connect: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-def authenticate_websocket_auth_data(auth_data: dict) -> bool:
-    """
-    Authenticate WebSocket connection using auth data from Socket.IO.
-    In local development mode, always returns True.
-    This is called when the client sends auth data in the connection.
-    """
-    # Skip authentication in local development
-    if hf_auth.is_local:
-        print("🔓 Local development mode: bypassing WebSocket auth data validation")
-        return True
-    try:
-        print("=== WEBSOCKET AUTH DATA AUTHENTICATION ===")
-        print(f"Auth data received: {auth_data}")
-        print(f"Auth data type: {type(auth_data)}")
-        if not auth_data:
-            print("❌ No auth data provided")
-            return False
-        if not isinstance(auth_data, dict):
-            print(f"❌ Auth data is not a dict: {type(auth_data)}")
-            return False
-        # Check for token in auth data
-        token = auth_data.get('token')
-        if token:
-            print(f"Found token in auth data: {token[:10]}...")
-            if hf_auth.verify_token(token):
-                print("✓ Token validated via auth data")
-                return True
-            else:
-                print("❌ Invalid token in auth data")
-        else:
-            print("❌ No token in auth data")
-            print(f"Available keys in auth data: {list(auth_data.keys())}")
-        return False
-    except Exception as e:
-        print(f"❌ Error in authenticate_websocket_auth_data: {e}")
-        import traceback
-        traceback.print_exc()
-        return False

app/config/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-"""
-Configuration package for Polyglot backend
-"""
-from .cors import cors_config
-__all__ = ["cors_config"]

app/config/cors.py DELETED Viewed

@@ -1,295 +0,0 @@
-"""
-CORS Configuration Module
-Centralized CORS configuration supporting multiple deployment environments.
-"""
-import os
-import re
-from typing import List, Optional
-from enum import Enum
-class Environment(str, Enum):
-    """Deployment environment types"""
-    LOCAL = "local"
-    DEVELOPMENT = "development"
-    STAGING = "staging"
-    PRODUCTION = "production"
-class CORSConfig:
-    """CORS configuration manager"""
-    # Default origins for local development
-    DEFAULT_LOCAL_ORIGINS = [
-        "http://localhost:3000",      # React/Next.js dev server
-        "http://localhost:3001",      # Polyglot frontend (Vite)
-        "http://localhost:3002",      # Lessons UI (Vite)
-        "http://localhost:3003",      # Podium (Vite)
-        "http://localhost:3004",      # Podium alternative port
-        "http://localhost:5173",      # Vite dev server
-        "http://localhost:7860",      # Backend self-reference
-        "http://localhost:8080",      # Alternative dev server
-        "http://127.0.0.1:3000",      # IPv4 localhost variant
-        "http://127.0.0.1:3001",      # IPv4 localhost variant
-        "http://127.0.0.1:3002",      # IPv4 localhost variant
-        "http://127.0.0.1:3003",      # IPv4 localhost variant
-        "http://127.0.0.1:3004",      # IPv4 localhost variant
-        "http://127.0.0.1:5173",      # IPv4 localhost variant
-        "http://127.0.0.1:7860",      # IPv4 localhost variant
-    ]
-    # Default patterns for production deployments
-    DEFAULT_PRODUCTION_PATTERNS = [
-        r"^https://.*\.tafiti\.dev$",           # Tafiti production/staging
-        r"^https://.*\.vercel\.app$",           # Vercel deployments
-        r"^https://.*\.hf\.space$",             # HuggingFace Spaces
-        r"^https://milimani\.tafiti-api\.org$", # Production API
-    ]
-    # Mobile app protocols
-    MOBILE_PROTOCOLS = [
-        "capacitor://localhost",      # Capacitor apps
-        "ionic://localhost",          # Ionic apps
-        "http://localhost",           # Mobile WebView
-    ]
-    def __init__(self):
-        self.environment = self._get_environment()
-        self.allowed_origins = self._build_allowed_origins()
-        self.allow_all = self._should_allow_all()
-        self.origin_patterns = self._build_origin_patterns()
-    def _get_environment(self) -> Environment:
-        """Get current deployment environment"""
-        env_str = os.getenv("ENVIRONMENT", "local").lower()
-        try:
-            return Environment(env_str)
-        except ValueError:
-            print(f"⚠️  Unknown environment '{env_str}', defaulting to 'local'")
-            return Environment.LOCAL
-    def _should_allow_all(self) -> bool:
-        """Check if CORS should allow all origins (insecure, dev only)"""
-        allow_all = os.getenv("CORS_ALLOW_ALL", "false").lower()
-        if allow_all == "true":
-            if self.environment == Environment.PRODUCTION:
-                print("❌ ERROR: CORS_ALLOW_ALL=true is not allowed in production")
-                return False
-            else:
-                print("⚠️  WARNING: CORS allowing all origins - INSECURE, use only for development")
-                return True
-        return False
-    def _build_allowed_origins(self) -> List[str]:
-        """Build list of allowed origins from environment and defaults"""
-        origins = []
-        # Get custom origins from environment variable
-        custom_origins_str = os.getenv("CORS_ALLOWED_ORIGINS", "")
-        if custom_origins_str:
-            # Parse comma-separated origins
-            custom_origins = [
-                origin.strip()
-                for origin in custom_origins_str.split(",")
-                if origin.strip()
-            ]
-            origins.extend(custom_origins)
-            print(f"✓ Loaded {len(custom_origins)} custom CORS origins from environment")
-        # Add defaults based on environment
-        if self.environment == Environment.LOCAL:
-            origins.extend(self.DEFAULT_LOCAL_ORIGINS)
-            print(f"✓ Added {len(self.DEFAULT_LOCAL_ORIGINS)} default local origins")
-        # Always include mobile protocols in non-production
-        if self.environment != Environment.PRODUCTION:
-            origins.extend(self.MOBILE_PROTOCOLS)
-            print(f"✓ Added {len(self.MOBILE_PROTOCOLS)} mobile protocol origins")
-        # Remove duplicates while preserving order
-        seen = set()
-        unique_origins = []
-        for origin in origins:
-            if origin not in seen:
-                seen.add(origin)
-                unique_origins.append(origin)
-        return unique_origins
-    def _build_origin_patterns(self) -> List[re.Pattern]:
-        """Build regex patterns for origin matching"""
-        patterns = []
-        # Get custom patterns from environment
-        custom_patterns_str = os.getenv("CORS_ALLOWED_PATTERNS", "")
-        if custom_patterns_str:
-            custom_pattern_strs = [
-                p.strip()
-                for p in custom_patterns_str.split(",")
-                if p.strip()
-            ]
-            for pattern_str in custom_pattern_strs:
-                try:
-                    patterns.append(re.compile(pattern_str))
-                except re.error as e:
-                    print(f"⚠️  Invalid regex pattern '{pattern_str}': {e}")
-            print(f"✓ Loaded {len(patterns)} custom CORS patterns from environment")
-        # Add default production patterns if in production/staging/development
-        if self.environment in [Environment.PRODUCTION, Environment.STAGING, Environment.DEVELOPMENT]:
-            for pattern_str in self.DEFAULT_PRODUCTION_PATTERNS:
-                patterns.append(re.compile(pattern_str))
-            print(f"✓ Added {len(self.DEFAULT_PRODUCTION_PATTERNS)} default production patterns")
-        # Add localhost pattern for development
-        if self.environment == Environment.LOCAL:
-            patterns.append(re.compile(r"^http://localhost:\d+$"))
-            patterns.append(re.compile(r"^http://127\.0\.0\.1:\d+$"))
-            print("✓ Added localhost wildcard patterns for development")
-        return patterns
-    def is_origin_allowed(self, origin: str) -> bool:
-        """
-        Check if an origin is allowed based on explicit list or patterns
-        Args:
-            origin: Origin to check (e.g., "https://app.tafiti.dev")
-        Returns:
-            True if origin is allowed, False otherwise
-        """
-        # If allow_all is enabled (dev only)
-        if self.allow_all:
-            return True
-        # Check explicit origins list
-        if origin in self.allowed_origins:
-            return True
-        # Check against patterns
-        for pattern in self.origin_patterns:
-            if pattern.match(origin):
-                return True
-        return False
-    def get_cors_middleware_config(self) -> dict:
-        """Get configuration dict for FastAPI CORSMiddleware"""
-        if self.allow_all:
-            return {
-                "allow_origins": ["*"],
-                "allow_credentials": False,  # Cannot use credentials with wildcard
-                "allow_methods": ["*"],
-                "allow_headers": ["*"],
-            }
-        # Build origin regex for pattern matching
-        if self.origin_patterns:
-            # Combine all patterns into a single regex
-            combined_pattern = "|".join(f"({p.pattern})" for p in self.origin_patterns)
-            return {
-                "allow_origins": self.allowed_origins,
-                "allow_origin_regex": combined_pattern,
-                "allow_credentials": True,
-                "allow_methods": ["*"],
-                "allow_headers": ["*"],
-            }
-        else:
-            return {
-                "allow_origins": self.allowed_origins,
-                "allow_credentials": True,
-                "allow_methods": ["*"],
-                "allow_headers": ["*"],
-            }
-    def get_socketio_cors_origins(self):
-        """
-        Get CORS origins for Socket.IO
-        Socket.IO doesn't support regex patterns, so we need to provide explicit list.
-        For production, this means we need to enumerate common origins.
-        """
-        if self.allow_all:
-            return "*"
-        # For Socket.IO, we can only provide explicit origins
-        # In production, we may need to enumerate common subdomains
-        socketio_origins = self.allowed_origins.copy()
-        # Add common production subdomains if using production patterns
-        if self.environment in [Environment.PRODUCTION, Environment.STAGING]:
-            # These should be added to CORS_ALLOWED_ORIGINS for Socket.IO support
-            production_origins = [
-                "https://app.tafiti.dev",
-                "https://www.tafiti.dev",
-                "https://polyglot.tafiti.dev",
-                "https://podium.tafiti.dev",
-                "https://milimani.tafiti-api.org",
-                "https://polyglot-ashy-beta.vercel.app",
-                "https://lessons-silk.vercel.app",
-                "https://lessons.tafiti.dev⁠",
-                "https://podium-chi.vercel.app",
-            ]
-            for origin in production_origins:
-                if origin not in socketio_origins:
-                    socketio_origins.append(origin)
-        return socketio_origins
-    def print_config_summary(self):
-        """Print CORS configuration summary for debugging"""
-        print("\n" + "="*70)
-        print("CORS CONFIGURATION SUMMARY")
-        print("="*70)
-        print(f"Environment: {self.environment.value}")
-        print(f"Allow All: {self.allow_all}")
-        print(f"\nExplicit Origins ({len(self.allowed_origins)}):")
-        for origin in self.allowed_origins:
-            print(f"  • {origin}")
-        if self.origin_patterns:
-            print(f"\nOrigin Patterns ({len(self.origin_patterns)}):")
-            for pattern in self.origin_patterns:
-                print(f"  • {pattern.pattern}")
-        print("\nExample Origins That Would Be Allowed:")
-        test_origins = [
-            "http://localhost:3001",
-            "http://localhost:3002",
-            "http://localhost:3003",
-            "http://localhost:3004",
-            "http://localhost:5173",
-            "https://app.tafiti.dev",
-            "https://polyglot.tafiti.dev",
-            "https://podium.tafiti.dev",
-            "https://polyglot.vercel.app",
-            "https://lessons-silk.vercel.app",
-            "https://podium-chi.vercel.app",
-            "https://polyglot-ashy-beta.vercel.app",
-            "https://mutisya-translator.hf.space",
-            "https://milimani.tafiti-api.org",
-            "capacitor://localhost",
-            "https://example.com",
-        ]
-        for test_origin in test_origins:
-            allowed = "✓" if self.is_origin_allowed(test_origin) else "✗"
-            print(f"  {allowed} {test_origin}")
-        print("="*70 + "\n")
-# Global CORS configuration instance
-cors_config = CORSConfig()

app/main.py DELETED Viewed

@@ -1,345 +0,0 @@
-import os
-import os
-import asyncio
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, Depends
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.staticfiles import StaticFiles
-from contextlib import asynccontextmanager
-import logging
-import socketio
-import engineio
-import re
-from app.routers import sessions, mobile, watch, learning
-from app.services.session_manager import SessionManager
-from app.services.transcription_service import TranscriptionService
-from app.services.translation_service import TranslationService
-from app.services.tts_service import TTSService
-from app.services.websocket_manager import WebSocketManager
-from app.auth import require_hf_token, optional_hf_token, authenticate_websocket_connect, authenticate_websocket_auth_data
-from app.config.cors import cors_config
-class ChunkArrayTruncateFilter(logging.Filter):
-    """Custom logging filter to truncate long arrays in Socket.IO logs for better readability"""
-    def filter(self, record):
-        if hasattr(record, 'msg') and isinstance(record.msg, str):
-            # More aggressive approach to truncate audioData arrays
-            # Pattern to match: "audioData":[numbers,numbers,numbers,...]
-            audiodata_pattern = r'"audioData":\[([0-9,-]+(?:,[0-9,-]+)*)\]'
-            def truncate_audiodata(match):
-                array_content = match.group(1)
-                # Split by comma and get first 10 items
-                items = array_content.split(',')
-                if len(items) > 10:
-                    truncated = ','.join(items[:10])
-                    return f'"audioData":[{truncated}, ...] (truncated {len(items)-10} more items)'
-                return match.group(0)
-            record.msg = re.sub(audiodata_pattern, truncate_audiodata, record.msg)
-            # Also handle any other large numeric arrays in brackets
-            # Pattern for arrays with more than 20 numbers
-            large_numeric_array_pattern = r'(\[)([0-9,-]+(?:,[0-9,-]+){20,})(\])'
-            def truncate_large_numeric_array(match):
-                prefix = match.group(1)
-                array_content = match.group(2)
-                suffix = match.group(3)
-                # Split by comma and get first 10 items
-                items = array_content.split(',')
-                if len(items) > 10:
-                    truncated = ','.join(items[:10])
-                    return f'{prefix}{truncated}, ... (truncated {len(items)-10} more){suffix}'
-                return match.group(0)
-            record.msg = re.sub(large_numeric_array_pattern, truncate_large_numeric_array, record.msg)
-            # Truncate other field types
-            for field_name in ['chunk', 'wavChunk', 'data']:
-                field_pattern = rf'"{field_name}":\[([0-9,-]+(?:,[0-9,-]+)*)\]'
-                def make_truncate_field(fname):
-                    def truncate_field(match):
-                        array_content = match.group(1)
-                        items = array_content.split(',')
-                        if len(items) > 10:
-                            truncated = ','.join(items[:10])
-                            return f'"{fname}":[{truncated}, ...] (truncated {len(items)-10} more)'
-                        return match.group(0)
-                    return truncate_field
-                record.msg = re.sub(field_pattern, make_truncate_field(field_name), record.msg)
-        return True
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Initialize services
-    print("=== INITIALIZING BACKEND SERVICES ===")
-    try:
-        print("Initializing transcription service...")
-        await transcription_service.initialize()
-        print("✓ Transcription service initialized")
-        print("Initializing translation service...")
-        await translation_service.initialize()
-        print("✓ Translation service initialized")
-        print("Initializing TTS service...")
-        await tts_service.initialize()
-        print("✓ TTS service initialized")
-        print("=== ALL SERVICES INITIALIZED SUCCESSFULLY ===" )
-        # Start background loading of additional models after successful startup
-        print("=== STARTING BACKGROUND MODEL LOADING ===")
-        transcription_service.start_background_loading()
-        tts_service.start_background_loading()
-        print("=== BACKGROUND MODEL LOADING INITIATED ===")
-        # Print CORS configuration summary
-        cors_config.print_config_summary()
-    except Exception as e:
-        print(f"❌ SERVICE INITIALIZATION FAILED: {e}")
-        import traceback
-        traceback.print_exc()
-        raise
-    yield
-    # Cleanup
-    print("=== CLEANING UP SERVICES ===")
-    await transcription_service.cleanup()
-    await translation_service.cleanup()
-    await tts_service.cleanup()
-    print("=== CLEANUP COMPLETE ===")
-app = FastAPI(
-    title="Real-time Transcription & Translation API",
-    description="Backend API for real-time speech transcription and translation",
-    version="1.0.0",
-    lifespan=lifespan
-)
-# CORS middleware with environment-based configuration
-cors_middleware_config = cors_config.get_cors_middleware_config()
-print(f"Configuring CORS middleware with keys: {list(cors_middleware_config.keys())}")
-app.add_middleware(
-    CORSMiddleware,
-    **cors_middleware_config
-)
-# Initialize services - using PyTorch models for better compatibility
-session_manager = SessionManager()
-transcription_service = TranscriptionService()
-translation_service = TranslationService()
-tts_service = TTSService()
-websocket_manager = WebSocketManager(
-    session_manager=session_manager,
-    transcription_service=transcription_service,
-    translation_service=translation_service,
-    tts_service=tts_service
-)
-# Include routers
-app.include_router(sessions.router, prefix="/api")
-app.include_router(mobile.router, prefix="/api")
-app.include_router(watch.router, prefix="/api")
-app.include_router(learning.router)
-# Set the session manager in the router
-sessions.session_manager = session_manager
-sessions.translation_service = translation_service
-sessions.tts_service = tts_service
-sessions.transcription_service = transcription_service
-# Set the mobile router
-mobile.translation_service = translation_service
-mobile.tts_service = tts_service
-mobile.transcription_service = transcription_service
-# Set the watch router
-watch.translation_service = translation_service
-watch.tts_service = tts_service
-watch.transcription_service = transcription_service
-# Configure logging with custom filter to truncate chunk arrays
-chunk_filter = ChunkArrayTruncateFilter()
-sio_logger = logging.getLogger('socketio')
-sio_logger.setLevel(logging.INFO)  # Show info logs with truncated arrays
-sio_logger.addFilter(chunk_filter)
-engineio_logger = logging.getLogger('engineio')
-engineio_logger.setLevel(logging.INFO)  # Show info logs with truncated arrays
-engineio_logger.addFilter(chunk_filter)
-# Also apply filter to the root logger to catch any other verbose logging
-root_logger = logging.getLogger()
-root_logger.addFilter(chunk_filter)
-# Configure Engine.IO payload limits for large audio chunks
-engineio.payload.Payload.max_decode_packets = 250
-# Socket.IO setup with environment-based CORS
-socketio_cors_origins = cors_config.get_socketio_cors_origins()
-print(f"Configuring Socket.IO CORS: {len(socketio_cors_origins) if isinstance(socketio_cors_origins, list) else 'all'} origins")
-sio = socketio.AsyncServer(
-    async_mode='asgi',
-    cors_allowed_origins=socketio_cors_origins,
-    cors_credentials=not cors_config.allow_all,  # Cannot use credentials with wildcard
-    logger=True,  # Re-enabled with custom filtering
-    engineio_logger=True,  # Re-enabled with custom filtering
-    always_connect=False  # This ensures connect event is called for authentication
-)
-# Set the socketio instance in websocket manager
-websocket_manager.set_socketio(sio)
-socket_app = socketio.ASGIApp(sio, app)
-@app.get("/health")
-async def health_check(token: str = Depends(optional_hf_token)):
-    """Health check endpoint - optionally authenticated"""
-    from app.auth import hf_auth
-    auth_status = "bypassed (local development)" if hf_auth.is_local else "authenticated"
-    if not hf_auth.is_local and not token:
-        auth_status = "unauthenticated"
-    return {
-        "status": "healthy",
-        "message": "Translation service is running",
-        "auth_status": auth_status,
-        "local_development": hf_auth.is_local,
-        "auth_bypassed": hf_auth.is_local,
-        "token_prefix": token[:10] + "..." if token and token != "local-development-bypass" else "local-bypass" if hf_auth.is_local else None,
-        "environment": {
-            "ENVIRONMENT": os.getenv('ENVIRONMENT', 'not set'),
-            "DEBUG": os.getenv('DEBUG', 'not set'),
-            "DISABLE_AUTH": os.getenv('DISABLE_AUTH', 'not set'),
-            "HOST": os.getenv('HOST', 'not set'),
-            "PORT": os.getenv('PORT', 'not set')
-        },
-        "services": {
-            "transcription": transcription_service is not None,
-            "translation": translation_service is not None,
-            "tts": tts_service is not None,
-            "sessions": session_manager is not None
-        }
-    }
-@sio.event
-async def connect(sid, environ=None, auth=None):
-    """Handle Socket.IO connection with authentication"""
-    try:
-        print(f"=== WEBSOCKET CONNECTION ATTEMPT ===")
-        print(f"SID: {sid}")
-        print(f"Auth data: {auth}")
-        print(f"Environ type: {type(environ)}")
-        print(f"Environ data: {environ}")
-        # Ensure environ is a dict
-        if environ is None:
-            environ = {}
-        print(f"Query string: {environ.get('QUERY_STRING', 'None')}")
-        print(f"Headers: {[k for k in environ.keys() if k.startswith('HTTP_')] if isinstance(environ, dict) else 'environ not dict'}")
-        # Check authentication from multiple sources
-        authenticated = False
-        auth_method = None
-        # Method 1: Check auth data from client
-        if auth and authenticate_websocket_auth_data(auth):
-            authenticated = True
-            auth_method = "auth_data"
-            print("✓ Authenticated via auth data")
-        # Method 2: Check environment (headers, query params)
-        elif environ and isinstance(environ, dict) and authenticate_websocket_connect(environ):
-            authenticated = True
-            auth_method = "environ"
-            print("✓ Authenticated via headers/query")
-        # TEMPORARY: Allow connections for debugging (remove in production)
-        # This helps identify if the issue is authentication or something else
-        if not authenticated:
-            print("⚠️  Authentication failed, but allowing for debugging")
-            if isinstance(environ, dict):
-                print(f"Available environ keys: {list(environ.keys())}")
-            # Uncomment the next line to temporarily allow unauthenticated connections for debugging
-            authenticated = True
-            auth_method = "debug_bypass"
-        if not authenticated:
-            print("❌ Authentication failed - disconnecting")
-            await sio.disconnect(sid)
-            return False
-        print(f"✓ WebSocket connection authenticated successfully via {auth_method}")
-        return True
-    except Exception as e:
-        print(f"❌ Error in connect handler: {e}")
-        import traceback
-        traceback.print_exc()
-        try:
-            await sio.disconnect(sid)
-        except:
-            pass
-        return False
-@sio.event
-async def disconnect(sid):
-    await websocket_manager.handle_disconnect(sid)
-@sio.event
-async def join_session(sid, data):
-    await websocket_manager.handle_join_session(sid, data)
-@sio.event
-async def join_hub(sid, data):
-    await websocket_manager.handle_join_hub(sid, data)
-@sio.event
-async def leave_session(sid, data):
-    await websocket_manager.handle_leave_session(sid, data)
-@sio.event
-async def audio_chunk(sid, data):
-    await websocket_manager.handle_audio_chunk(sid, data)
-@sio.event
-async def speaking_status(sid, data):
-    await websocket_manager.handle_speaking_status(sid, data)
-@sio.event
-async def test_echo(sid, data):
-    """Test event to verify WebSocket communication"""
-    await sio.emit('test_echo_response', data, room=sid)
-@sio.event
-async def update_participant_language(sid, data):
-    """Update participant's language (affects speech recognition)"""
-    await websocket_manager.handle_update_participant_language(sid, data)
-@sio.event
-async def update_session_languages(sid, data):
-    """Update session's languages (affects translation targets)"""
-    await websocket_manager.handle_update_session_languages(sid, data)
-# Serve static files (for frontend)
-if os.path.exists("../frontend/dist"):
-    app.mount("/", StaticFiles(directory="../frontend/dist", html=True), name="static")
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run("main:socket_app", host="0.0.0.0", port=7860, reload=True)

app/main.py.bak DELETED Viewed

@@ -1,345 +0,0 @@
-import os
-import os
-import asyncio
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, Depends
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.staticfiles import StaticFiles
-from contextlib import asynccontextmanager
-import logging
-import socketio
-import engineio
-import re
-from app.routers import sessions, mobile, watch, learning
-from app.services.session_manager import SessionManager
-from app.services.transcription_service import TranscriptionService
-from app.services.translation_service import TranslationService
-from app.services.tts_service import TTSService
-from app.services.websocket_manager import WebSocketManager
-from app.auth import require_hf_token, optional_hf_token, authenticate_websocket_connect, authenticate_websocket_auth_data
-from app.config.cors import cors_config
-class ChunkArrayTruncateFilter(logging.Filter):
-    """Custom logging filter to truncate long arrays in Socket.IO logs for better readability"""
-    def filter(self, record):
-        if hasattr(record, 'msg') and isinstance(record.msg, str):
-            # More aggressive approach to truncate audioData arrays
-            # Pattern to match: "audioData":[numbers,numbers,numbers,...]
-            audiodata_pattern = r'"audioData":\[([0-9,-]+(?:,[0-9,-]+)*)\]'
-            def truncate_audiodata(match):
-                array_content = match.group(1)
-                # Split by comma and get first 10 items
-                items = array_content.split(',')
-                if len(items) > 10:
-                    truncated = ','.join(items[:10])
-                    return f'"audioData":[{truncated}, ...] (truncated {len(items)-10} more items)'
-                return match.group(0)
-            record.msg = re.sub(audiodata_pattern, truncate_audiodata, record.msg)
-            # Also handle any other large numeric arrays in brackets
-            # Pattern for arrays with more than 20 numbers
-            large_numeric_array_pattern = r'(\[)([0-9,-]+(?:,[0-9,-]+){20,})(\])'
-            def truncate_large_numeric_array(match):
-                prefix = match.group(1)
-                array_content = match.group(2)
-                suffix = match.group(3)
-                # Split by comma and get first 10 items
-                items = array_content.split(',')
-                if len(items) > 10:
-                    truncated = ','.join(items[:10])
-                    return f'{prefix}{truncated}, ... (truncated {len(items)-10} more){suffix}'
-                return match.group(0)
-            record.msg = re.sub(large_numeric_array_pattern, truncate_large_numeric_array, record.msg)
-            # Truncate other field types
-            for field_name in ['chunk', 'wavChunk', 'data']:
-                field_pattern = rf'"{field_name}":\[([0-9,-]+(?:,[0-9,-]+)*)\]'
-                def make_truncate_field(fname):
-                    def truncate_field(match):
-                        array_content = match.group(1)
-                        items = array_content.split(',')
-                        if len(items) > 10:
-                            truncated = ','.join(items[:10])
-                            return f'"{fname}":[{truncated}, ...] (truncated {len(items)-10} more)'
-                        return match.group(0)
-                    return truncate_field
-                record.msg = re.sub(field_pattern, make_truncate_field(field_name), record.msg)
-        return True
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Initialize services
-    print("=== INITIALIZING BACKEND SERVICES ===")
-    try:
-        print("Initializing transcription service...")
-        await transcription_service.initialize()
-        print("✓ Transcription service initialized")
-        print("Initializing translation service...")
-        await translation_service.initialize()
-        print("✓ Translation service initialized")
-        print("Initializing TTS service...")
-        await tts_service.initialize()
-        print("✓ TTS service initialized")
-        print("=== ALL SERVICES INITIALIZED SUCCESSFULLY ===")
-        # Start background loading of additional models after successful startup
-        print("=== STARTING BACKGROUND MODEL LOADING ===")
-        transcription_service.start_background_loading()
-        tts_service.start_background_loading()
-        print("=== BACKGROUND MODEL LOADING INITIATED ===")
-        # Print CORS configuration summary
-        cors_config.print_config_summary()
-    except Exception as e:
-        print(f"❌ SERVICE INITIALIZATION FAILED: {e}")
-        import traceback
-        traceback.print_exc()
-        raise
-    yield
-    # Cleanup
-    print("=== CLEANING UP SERVICES ===")
-    await transcription_service.cleanup()
-    await translation_service.cleanup()
-    await tts_service.cleanup()
-    print("=== CLEANUP COMPLETE ===")
-app = FastAPI(
-    title="Real-time Transcription & Translation API",
-    description="Backend API for real-time speech transcription and translation",
-    version="1.0.0",
-    lifespan=lifespan
-)
-# CORS middleware with environment-based configuration
-cors_middleware_config = cors_config.get_cors_middleware_config()
-print(f"Configuring CORS middleware with keys: {list(cors_middleware_config.keys())}")
-app.add_middleware(
-    CORSMiddleware,
-    **cors_middleware_config
-)
-# Initialize services - using PyTorch models for better compatibility
-session_manager = SessionManager()
-transcription_service = TranscriptionService()
-translation_service = TranslationService()
-tts_service = TTSService()
-websocket_manager = WebSocketManager(
-    session_manager=session_manager,
-    transcription_service=transcription_service,
-    translation_service=translation_service,
-    tts_service=tts_service
-)
-# Include routers
-app.include_router(sessions.router, prefix="/api")
-app.include_router(mobile.router, prefix="/api")
-app.include_router(watch.router, prefix="/api")
-app.include_router(learning.router)
-# Set the session manager in the router
-sessions.session_manager = session_manager
-sessions.translation_service = translation_service
-sessions.tts_service = tts_service
-sessions.transcription_service = transcription_service
-# Set the mobile router
-mobile.translation_service = translation_service
-mobile.tts_service = tts_service
-mobile.transcription_service = transcription_service
-# Set the watch router
-watch.translation_service = translation_service
-watch.tts_service = tts_service
-watch.transcription_service = transcription_service
-# Configure logging with custom filter to truncate chunk arrays
-chunk_filter = ChunkArrayTruncateFilter()
-sio_logger = logging.getLogger('socketio')
-sio_logger.setLevel(logging.INFO)  # Show info logs with truncated arrays
-sio_logger.addFilter(chunk_filter)
-engineio_logger = logging.getLogger('engineio')
-engineio_logger.setLevel(logging.INFO)  # Show info logs with truncated arrays
-engineio_logger.addFilter(chunk_filter)
-# Also apply filter to the root logger to catch any other verbose logging
-root_logger = logging.getLogger()
-root_logger.addFilter(chunk_filter)
-# Configure Engine.IO payload limits for large audio chunks
-engineio.payload.Payload.max_decode_packets = 250
-# Socket.IO setup with environment-based CORS
-socketio_cors_origins = cors_config.get_socketio_cors_origins()
-print(f"Configuring Socket.IO CORS: {len(socketio_cors_origins) if isinstance(socketio_cors_origins, list) else 'all'} origins")
-sio = socketio.AsyncServer(
-    async_mode='asgi',
-    cors_allowed_origins=socketio_cors_origins,
-    cors_credentials=not cors_config.allow_all,  # Cannot use credentials with wildcard
-    logger=True,  # Re-enabled with custom filtering
-    engineio_logger=True,  # Re-enabled with custom filtering
-    always_connect=False  # This ensures connect event is called for authentication
-)
-# Set the socketio instance in websocket manager
-websocket_manager.set_socketio(sio)
-socket_app = socketio.ASGIApp(sio, app)
-@app.get("/health")
-async def health_check(token: str = Depends(optional_hf_token)):
-    """Health check endpoint - optionally authenticated"""
-    from app.auth import hf_auth
-    auth_status = "bypassed (local development)" if hf_auth.is_local else "authenticated"
-    if not hf_auth.is_local and not token:
-        auth_status = "unauthenticated"
-    return {
-        "status": "healthy",
-        "message": "Translation service is running",
-        "auth_status": auth_status,
-        "local_development": hf_auth.is_local,
-        "auth_bypassed": hf_auth.is_local,
-        "token_prefix": token[:10] + "..." if token and token != "local-development-bypass" else "local-bypass" if hf_auth.is_local else None,
-        "environment": {
-            "ENVIRONMENT": os.getenv('ENVIRONMENT', 'not set'),
-            "DEBUG": os.getenv('DEBUG', 'not set'),
-            "DISABLE_AUTH": os.getenv('DISABLE_AUTH', 'not set'),
-            "HOST": os.getenv('HOST', 'not set'),
-            "PORT": os.getenv('PORT', 'not set')
-        },
-        "services": {
-            "transcription": transcription_service is not None,
-            "translation": translation_service is not None,
-            "tts": tts_service is not None,
-            "sessions": session_manager is not None
-        }
-    }
-@sio.event
-async def connect(sid, environ=None, auth=None):
-    """Handle Socket.IO connection with authentication"""
-    try:
-        print(f"=== WEBSOCKET CONNECTION ATTEMPT ===")
-        print(f"SID: {sid}")
-        print(f"Auth data: {auth}")
-        print(f"Environ type: {type(environ)}")
-        print(f"Environ data: {environ}")
-        # Ensure environ is a dict
-        if environ is None:
-            environ = {}
-        print(f"Query string: {environ.get('QUERY_STRING', 'None')}")
-        print(f"Headers: {[k for k in environ.keys() if k.startswith('HTTP_')] if isinstance(environ, dict) else 'environ not dict'}")
-        # Check authentication from multiple sources
-        authenticated = False
-        auth_method = None
-        # Method 1: Check auth data from client
-        if auth and authenticate_websocket_auth_data(auth):
-            authenticated = True
-            auth_method = "auth_data"
-            print("✓ Authenticated via auth data")
-        # Method 2: Check environment (headers, query params)
-        elif environ and isinstance(environ, dict) and authenticate_websocket_connect(environ):
-            authenticated = True
-            auth_method = "environ"
-            print("✓ Authenticated via headers/query")
-        # TEMPORARY: Allow connections for debugging (remove in production)
-        # This helps identify if the issue is authentication or something else
-        if not authenticated:
-            print("⚠️  Authentication failed, but allowing for debugging")
-            if isinstance(environ, dict):
-                print(f"Available environ keys: {list(environ.keys())}")
-            # Uncomment the next line to temporarily allow unauthenticated connections for debugging
-            authenticated = True
-            auth_method = "debug_bypass"
-        if not authenticated:
-            print("❌ Authentication failed - disconnecting")
-            await sio.disconnect(sid)
-            return False
-        print(f"✓ WebSocket connection authenticated successfully via {auth_method}")
-        return True
-    except Exception as e:
-        print(f"❌ Error in connect handler: {e}")
-        import traceback
-        traceback.print_exc()
-        try:
-            await sio.disconnect(sid)
-        except:
-            pass
-        return False
-@sio.event
-async def disconnect(sid):
-    await websocket_manager.handle_disconnect(sid)
-@sio.event
-async def join_session(sid, data):
-    await websocket_manager.handle_join_session(sid, data)
-@sio.event
-async def join_hub(sid, data):
-    await websocket_manager.handle_join_hub(sid, data)
-@sio.event
-async def leave_session(sid, data):
-    await websocket_manager.handle_leave_session(sid, data)
-@sio.event
-async def audio_chunk(sid, data):
-    await websocket_manager.handle_audio_chunk(sid, data)
-@sio.event
-async def speaking_status(sid, data):
-    await websocket_manager.handle_speaking_status(sid, data)
-@sio.event
-async def test_echo(sid, data):
-    """Test event to verify WebSocket communication"""
-    await sio.emit('test_echo_response', data, room=sid)
-@sio.event
-async def update_participant_language(sid, data):
-    """Update participant's language (affects speech recognition)"""
-    await websocket_manager.handle_update_participant_language(sid, data)
-@sio.event
-async def update_session_languages(sid, data):
-    """Update session's languages (affects translation targets)"""
-    await websocket_manager.handle_update_session_languages(sid, data)
-# Serve static files (for frontend)
-if os.path.exists("../frontend/dist"):
-    app.mount("/", StaticFiles(directory="../frontend/dist", html=True), name="static")
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run("main:socket_app", host="0.0.0.0", port=7860, reload=True)

app/models/__init__.py DELETED Viewed

@@ -1,77 +0,0 @@
-from pydantic import BaseModel, Field
-from typing import List, Dict, Optional
-from enum import Enum
-class LanguageCode(str, Enum):
-    ENGLISH = "eng"
-    SWAHILI = "swa"
-    KIKUYU = "kik"
-    KAMBA = "kam"
-    KIMERU = "mer"
-    LUO = "luo"
-    SOMALI = "som"
-class Language(BaseModel):
-    code: LanguageCode
-    name: str
-    display_name: str
-class ParticipantCreate(BaseModel):
-    name: str
-    language: LanguageCode
-class Participant(BaseModel):
-    id: str
-    name: str
-    language: Language
-    is_organizer: bool = False
-    is_speaking: bool = False
-    is_connected: bool = False
-class SessionCreate(BaseModel):
-    name: str
-    organizer_name: str
-    languages: List[LanguageCode]
-    enable_tts: bool = True  # Enable TTS by default for backward compatibility
-class Session(BaseModel):
-    id: str
-    name: str
-    organizer_name: str
-    participants: List[Participant] = []
-    languages: List[Language] = []
-    qr_code_url: Optional[str] = None
-    is_active: bool = True
-    enable_tts: bool = True  # TTS enabled by default
-class Message(BaseModel):
-    id: str
-    session_id: str
-    speaker_id: str
-    speaker_name: str
-    original_text: str
-    original_language: Language
-    translations: Dict[str, str] = {}
-    is_transcribing: bool = False
-class TranscriptionUpdate(BaseModel):
-    message_id: str
-    text: str
-    is_complete: bool
-    confidence: Optional[float] = None
-class TranslationUpdate(BaseModel):
-    message_id: str
-    target_language: LanguageCode
-    translated_text: str
-class AudioChunk(BaseModel):
-    session_id: str
-    participant_id: str
-    audio_data: bytes
-class WebSocketMessage(BaseModel):
-    type: str
-    data: Dict
-    session_id: str
-    participant_id: Optional[str] = None

app/routers/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # Routers package

app/routers/add_phase_endpoints.py DELETED Viewed

@@ -1,490 +0,0 @@
-# Script to add remaining Phase 1-3 endpoints to learning.py
-endpoints_code = """
-@router.post("/vocabulary/add")
-async def add_vocabulary_to_practice(
-    vocab_request: VocabularyAddRequest,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    \"\"\"Add a vocabulary word to user's practice queue with FSRS initialization\"\"\"
-    try:
-        user_id = token if token else 'anonymous'
-        vocab = learning_service.get_vocabulary(vocab_request.vocab_id)
-        if not vocab:
-            raise HTTPException(status_code=404, detail="Vocabulary not found")
-        fsrs_data = {
-            'difficulty': 0.3,
-            'stability': 2.5,
-            'retrievability': 1.0,
-            'review_count': 0,
-            'last_review': None,
-            'next_review': datetime.utcnow().isoformat() + 'Z',
-            'lapses': 0,
-            'state': 'new'
-        }
-        user_vocab = {
-            'vocabulary_id': vocab_request.vocab_id,
-            'swahili': vocab.get('swahili', ''),
-            'english': vocab.get('english', ''),
-            'part_of_speech': vocab.get('part_of_speech', 'unknown'),
-            'added_at': datetime.utcnow().isoformat() + 'Z',
-            'added_from': vocab_request.source_lesson_id,
-            'fsrs': fsrs_data,
-            'mastery_level': 0,
-            'times_reviewed': 0,
-            'times_correct': 0,
-            'accuracy': 0.0
-        }
-        success = learning_service.update_vocabulary_progress(
-            user_id, str(vocab_request.vocab_id), user_vocab
-        )
-        if success:
-            return {"success": True, "vocabulary": user_vocab}
-        else:
-            raise HTTPException(status_code=500, detail="Failed to add vocabulary")
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error adding vocabulary: {e}")
-        raise HTTPException(status_code=500, detail="Failed to add vocabulary")
-def calculate_next_review_fsrs(fsrs: Dict, grade: int) -> Dict:
-    \"\"\"Implement FSRS algorithm\"\"\"
-    from datetime import timedelta
-    difficulty = fsrs['difficulty']
-    stability = fsrs['stability']
-    if grade == 0:
-        new_difficulty = min(difficulty + 0.2, 1.0)
-    elif grade == 2:
-        new_difficulty = min(difficulty + 0.1, 1.0)
-    elif grade == 4:
-        new_difficulty = max(difficulty - 0.1, 0.0)
-    else:
-        new_difficulty = difficulty
-    if grade == 0:
-        new_stability = stability * 0.5
-        state = 'relearning'
-        interval_minutes = 10
-    elif grade == 2:
-        new_stability = stability * 1.2
-        state = 'review'
-        interval_minutes = int(new_stability * 24 * 60)
-    elif grade == 3:
-        new_stability = stability * 2.5
-        state = 'review'
-        interval_minutes = int(new_stability * 24 * 60)
-    else:
-        new_stability = stability * 4.0
-        state = 'review'
-        interval_minutes = int(new_stability * 24 * 60)
-    next_review = datetime.utcnow() + timedelta(minutes=interval_minutes)
-    return {
-        'difficulty': new_difficulty,
-        'stability': new_stability,
-        'retrievability': 0.9 if grade >= 2 else 0.0,
-        'review_count': fsrs['review_count'] + 1,
-        'last_review': datetime.utcnow().isoformat() + 'Z',
-        'next_review': next_review.isoformat() + 'Z',
-        'lapses': fsrs['lapses'],
-        'state': state,
-        'interval_days': interval_minutes / (24 * 60)
-    }
-def calculate_mastery_level(vocab: Dict) -> int:
-    \"\"\"Calculate mastery level (0-5)\"\"\"
-    accuracy = vocab['accuracy']
-    reviews = vocab['times_reviewed']
-    stability = vocab['fsrs']['stability']
-    if reviews == 0:
-        return 0
-    elif reviews < 5 or accuracy < 70:
-        return 1
-    elif reviews < 10 or accuracy < 85:
-        return 2
-    elif reviews < 20 or accuracy < 95:
-        return 3
-    elif reviews >= 20 and accuracy >= 95 and stability >= 30:
-        return 4
-    elif reviews >= 40 and accuracy >= 98 and stability >= 90:
-        return 5
-    else:
-        return 3
-@router.post("/vocabulary/review")
-async def record_vocabulary_review_fsrs(
-    review_request: VocabularyReviewRequest,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    \"\"\"Record vocabulary review and update FSRS parameters\"\"\"
-    try:
-        user_id = token if token else 'anonymous'
-        progress = learning_service.get_user_progress(user_id)
-        if not progress or str(review_request.vocab_id) not in progress.get('vocabulary_progress', {}):
-            raise HTTPException(status_code=404, detail="Vocabulary not in practice queue")
-        vocab = progress['vocabulary_progress'][str(review_request.vocab_id)]
-        fsrs = vocab['fsrs']
-        grade_map = {'again': 0, 'hard': 2, 'good': 3, 'easy': 4}
-        grade = grade_map.get(review_request.rating, 3)
-        new_fsrs = calculate_next_review_fsrs(fsrs, grade)
-        vocab['fsrs'] = new_fsrs
-        vocab['times_reviewed'] += 1
-        if grade >= 2:
-            vocab['times_correct'] += 1
-        else:
-            vocab['fsrs']['lapses'] += 1
-        vocab['accuracy'] = (vocab['times_correct'] / vocab['times_reviewed']) * 100 if vocab['times_reviewed'] > 0 else 0
-        vocab['mastery_level'] = calculate_mastery_level(vocab)
-        vocab['last_reviewed_at'] = datetime.utcnow().isoformat() + 'Z'
-        if 'vocabulary_reviewed' not in progress['overall_stats']:
-            progress['overall_stats']['vocabulary_reviewed'] = 0
-        progress['overall_stats']['vocabulary_reviewed'] += 1
-        learning_service.save_user_progress(user_id, progress)
-        return {
-            "success": True,
-            "vocabulary": vocab,
-            "next_review": new_fsrs['next_review'],
-            "interval_days": new_fsrs['interval_days']
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error recording vocabulary review: {e}")
-        raise HTTPException(status_code=500, detail="Failed to record review")
-@router.get("/vocabulary/stats")
-async def get_vocabulary_stats(
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    \"\"\"Get vocabulary mastery statistics\"\"\"
-    try:
-        user_id = token if token else 'anonymous'
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            return {
-                "total_words": 0,
-                "in_practice": 0,
-                "mastery_breakdown": {str(i): 0 for i in range(6)},
-                "average_accuracy": 0
-            }
-        vocab_progress = progress.get('vocabulary_progress', {})
-        mastery_breakdown = {str(i): 0 for i in range(6)}
-        total_accuracy = 0
-        total_with_reviews = 0
-        for vocab_data in vocab_progress.values():
-            level = vocab_data.get('mastery_level', 0)
-            mastery_breakdown[str(level)] += 1
-            if vocab_data.get('times_reviewed', 0) > 0:
-                total_accuracy += vocab_data.get('accuracy', 0)
-                total_with_reviews += 1
-        avg_accuracy = total_accuracy / total_with_reviews if total_with_reviews > 0 else 0
-        return {
-            "total_words": len(vocab_progress),
-            "in_practice": len(vocab_progress),
-            "mastery_breakdown": mastery_breakdown,
-            "average_accuracy": round(avg_accuracy, 1),
-            "total_reviews": sum(v.get('times_reviewed', 0) for v in vocab_progress.values())
-        }
-    except Exception as e:
-        logger.error(f"Error getting vocabulary stats: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get stats")
-@router.get("/vocabulary/library")
-async def get_vocabulary_library(
-    lesson_id: Optional[int] = None,
-    level: Optional[str] = None,
-    search: Optional[str] = None,
-    request: Request = None,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    \"\"\"Browse all vocabulary with filters\"\"\"
-    try:
-        user_id = token if token else 'anonymous'
-        all_vocab = learning_service.get_all_vocabulary()
-        progress = learning_service.get_user_progress(user_id)
-        user_vocab = progress.get('vocabulary_progress', {}) if progress else {}
-        filtered_vocab = all_vocab
-        if lesson_id:
-            filtered_vocab = [v for v in filtered_vocab if v.get('lesson_id') == lesson_id]
-        if level:
-            filtered_vocab = [v for v in filtered_vocab if v.get('level') == level]
-        if search:
-            search_lower = search.lower()
-            filtered_vocab = [v for v in filtered_vocab
-                            if search_lower in v.get('swahili', '').lower()
-                            or search_lower in v.get('english', '').lower()]
-        for vocab in filtered_vocab:
-            vocab_id = str(vocab.get('vocabulary_id') or vocab.get('id'))
-            if vocab_id in user_vocab:
-                vocab['status'] = 'practicing'
-                vocab['mastery_level'] = user_vocab[vocab_id].get('mastery_level', 0)
-                vocab['accuracy'] = user_vocab[vocab_id].get('accuracy', 0)
-                vocab['next_review'] = user_vocab[vocab_id].get('fsrs', {}).get('next_review')
-            else:
-                vocab['status'] = 'not_practicing'
-                vocab['mastery_level'] = 0
-        return {
-            "vocabulary": filtered_vocab,
-            "total": len(filtered_vocab),
-            "filters_applied": {
-                "lesson_id": lesson_id,
-                "level": level,
-                "search": search
-            }
-        }
-    except Exception as e:
-        logger.error(f"Error getting vocabulary library: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get vocabulary")
-# Reading Comprehension
-class ComprehensionAnswer(BaseModel):
-    question_id: str
-    answer: str
-class ComprehensionSubmission(BaseModel):
-    lesson_id: int
-    passage_id: str
-    answers: List[ComprehensionAnswer]
-@router.post("/comprehension/submit")
-async def submit_comprehension_answers(
-    submission: ComprehensionSubmission,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    \"\"\"Submit reading comprehension answers and get scoring\"\"\"
-    try:
-        user_id = token if token else 'anonymous'
-        lesson = learning_service.get_lesson(submission.lesson_id)
-        if not lesson:
-            raise HTTPException(status_code=404, detail="Lesson not found")
-        passage = None
-        for p in lesson.get('reading_passages', []):
-            if p['passage_id'] == submission.passage_id:
-                passage = p
-                break
-        if not passage:
-            raise HTTPException(status_code=404, detail="Passage not found")
-        results = []
-        correct_count = 0
-        for submitted in submission.answers:
-            question_id = submitted.question_id
-            user_answer = submitted.answer.strip().lower()
-            question = None
-            for q in passage['comprehension_questions']:
-                if q['question_id'] == question_id:
-                    question = q
-                    break
-            if not question:
-                continue
-            correct_answers = [ans.strip().lower() for ans in question.get('correct_answers', [])]
-            is_correct = user_answer in correct_answers
-            if is_correct:
-                correct_count += 1
-            results.append({
-                "question_id": question_id,
-                "correct": is_correct,
-                "user_answer": user_answer,
-                "correct_answer": question['correct_answers'][0] if correct_answers else None,
-                "explanation": question.get('explanation')
-            })
-        score = (correct_count / len(submission.answers)) * 100 if submission.answers else 0
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            progress = learning_service.create_default_progress(user_id)
-        if 'comprehension_scores' not in progress:
-            progress['comprehension_scores'] = {}
-        progress['comprehension_scores'][f"{submission.lesson_id}_{submission.passage_id}"] = {
-            "score": score,
-            "completed_at": datetime.utcnow().isoformat() + 'Z',
-            "attempts": progress['comprehension_scores'].get(f"{submission.lesson_id}_{submission.passage_id}", {}).get('attempts', 0) + 1
-        }
-        learning_service.save_user_progress(user_id, progress)
-        return {
-            "results": results,
-            "score": round(score, 1),
-            "correct": correct_count,
-            "total": len(submission.answers)
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error submitting comprehension: {e}")
-        raise HTTPException(status_code=500, detail="Failed to submit comprehension")
-# Task Scenarios
-class ScenarioProgressUpdate(BaseModel):
-    turn_id: str
-    choice_id: str
-@router.get("/scenarios/{scenario_id}")
-async def get_scenario(
-    scenario_id: str,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    \"\"\"Get task scenario with branching dialogue\"\"\"
-    try:
-        user_id = token if token else 'anonymous'
-        scenario = learning_service.get_scenario(scenario_id)
-        if not scenario:
-            raise HTTPException(status_code=404, detail="Scenario not found")
-        progress = learning_service.get_user_progress(user_id)
-        scenario_progress = None
-        if progress and 'scenario_progress' in progress:
-            scenario_progress = progress['scenario_progress'].get(scenario_id)
-        return {
-            "scenario": scenario,
-            "user_progress": scenario_progress
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error getting scenario: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get scenario")
-@router.post("/scenarios/{scenario_id}/progress")
-async def update_scenario_progress(
-    scenario_id: str,
-    progress_update: ScenarioProgressUpdate,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    \"\"\"Update scenario progress with user choice\"\"\"
-    try:
-        user_id = token if token else 'anonymous'
-        scenario = learning_service.get_scenario(scenario_id)
-        if not scenario:
-            raise HTTPException(status_code=404, detail="Scenario not found")
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            progress = learning_service.create_default_progress(user_id)
-        if 'scenario_progress' not in progress:
-            progress['scenario_progress'] = {}
-        if scenario_id not in progress['scenario_progress']:
-            progress['scenario_progress'][scenario_id] = {
-                "started_at": datetime.utcnow().isoformat() + 'Z',
-                "turns": [],
-                "completed": False
-            }
-        progress['scenario_progress'][scenario_id]['turns'].append({
-            "turn_id": progress_update.turn_id,
-            "choice_id": progress_update.choice_id,
-            "timestamp": datetime.utcnow().isoformat() + 'Z'
-        })
-        turns_count = len(progress['scenario_progress'][scenario_id]['turns'])
-        if turns_count >= scenario.get('required_turns', 6):
-            progress['scenario_progress'][scenario_id]['completed'] = True
-            progress['scenario_progress'][scenario_id]['completed_at'] = datetime.utcnow().isoformat() + 'Z'
-        learning_service.save_user_progress(user_id, progress)
-        return {
-            "success": True,
-            "progress": progress['scenario_progress'][scenario_id]
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error updating scenario progress: {e}")
-        raise HTTPException(status_code=500, detail="Failed to update scenario")
-@router.get("/scenarios")
-async def list_scenarios(
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    \"\"\"Get list of all available scenarios\"\"\"
-    try:
-        scenarios = learning_service.get_all_scenarios()
-        return {
-            "success": True,
-            "scenarios": scenarios,
-            "total": len(scenarios)
-        }
-    except Exception as e:
-        logger.error(f"Error listing scenarios: {e}")
-        raise HTTPException(status_code=500, detail="Failed to list scenarios")
-"""
-# Append to learning.py
-with open('C:/repos/polyglot/backend/app/routers/learning.py', 'a', encoding='utf-8') as f:
-    f.write(endpoints_code)
-print("Successfully added all remaining Phase 1-3 endpoints!")

app/routers/learning.py DELETED Viewed

@@ -1,1020 +0,0 @@
-"""
-Learning API Router - REST endpoints for language learning functionality
-Provides endpoints for:
-- Fetching lesson catalog and individual lessons
-- Managing user progress
-- Recording lesson completion and scores
-- Achievement tracking
-"""
-from fastapi import APIRouter, HTTPException, Depends, Request, File, UploadFile
-from fastapi.responses import Response
-from pydantic import BaseModel
-from typing import List, Dict, Optional, Any
-from datetime import datetime
-import logging
-import io
-from app.services.learning_data_service import LearningDataService
-from app.auth import optional_hf_token
-logger = logging.getLogger(__name__)
-router = APIRouter(prefix="/api/learning", tags=["learning"])
-# Initialize data service
-learning_service = LearningDataService()
-# ==================== Request/Response Models ====================
-class LessonProgressUpdate(BaseModel):
-    lesson_id: int
-    status: str  # 'in_progress' or 'completed'
-    score: Optional[int] = None
-    pronunciation_score: Optional[float] = None
-    listening_score: Optional[float] = None
-    comprehension_score: Optional[float] = None
-    time_spent_seconds: Optional[int] = None
-    steps_completed: Optional[int] = None
-    steps_skipped: Optional[int] = None
-class VocabularyReview(BaseModel):
-    vocabulary_id: int
-    swahili: str
-    is_correct: bool
-    mastery_level: Optional[int] = None
-class AchievementCheck(BaseModel):
-    achievement_id: str
-    progress: int
-    target: int
-# ==================== Lesson Endpoints ====================
-@router.get("/lessons")
-async def get_lessons(language: Optional[str] = 'swahili', request: Request = None, token: Optional[str] = Depends(optional_hf_token)):
-    """
-    Get catalog of all available lessons for a specific language
-    Args:
-        language: Language code (swahili, kamba, maasai)
-    Returns the lesson index with metadata for all lessons
-    """
-    try:
-        index = learning_service.get_lessons_index(language)
-        if not index:
-            raise HTTPException(status_code=404, detail=f"Lessons catalog not found for {language}")
-        return {
-            "success": True,
-            "lessons": index.get('lessons', []),
-            "learning_paths": index.get('learning_paths', {}),
-            "metadata": index.get('metadata', {})
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error fetching lessons for {language}: {e}")
-        raise HTTPException(status_code=500, detail="Failed to fetch lessons")
-@router.get("/lessons/{lesson_id}")
-async def get_lesson(lesson_id: int, language: Optional[str] = 'swahili', request: Request = None, token: Optional[str] = Depends(optional_hf_token)):
-    """
-    Get detailed lesson content including vocabulary, dialogue, and exercises
-    Args:
-        lesson_id: ID of the lesson to fetch
-        language: Language code (swahili, kamba, maasai)
-    """
-    try:
-        lesson = learning_service.get_lesson(lesson_id, language)
-        if not lesson:
-            raise HTTPException(status_code=404, detail=f"Lesson {lesson_id} not found for {language}")
-        return {
-            "success": True,
-            "lesson": lesson
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error fetching lesson {lesson_id} for {language}: {e}")
-        raise HTTPException(status_code=500, detail="Failed to fetch lesson")
-# ==================== User Progress Endpoints ====================
-@router.get("/progress")
-async def get_user_progress(request: Request, token: Optional[str] = Depends(optional_hf_token)):
-    """
-    Get user's learning progress
-    Returns overall stats, lesson progress, vocabulary progress, and achievements
-    """
-    try:
-        # Use authenticated user ID or default for anonymous users
-        user_id = token if token else 'anonymous'
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            raise HTTPException(status_code=500, detail="Failed to load user progress")
-        return {
-            "success": True,
-            "progress": progress
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error fetching user progress: {e}")
-        raise HTTPException(status_code=500, detail="Failed to fetch user progress")
-@router.post("/progress/lesson")
-async def update_lesson_progress(
-    progress_update: LessonProgressUpdate,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """
-    Update progress for a specific lesson
-    Records completion status, scores, and time spent on a lesson
-    """
-    try:
-        user_id = token if token else 'anonymous'
-        # Build progress update dict
-        update_data = {
-            'lesson_id': progress_update.lesson_id,
-            'status': progress_update.status
-        }
-        # Add optional fields if provided
-        if progress_update.score is not None:
-            update_data['latest_score'] = progress_update.score
-            # Track best score
-            user_progress = learning_service.get_user_progress(user_id)
-            if user_progress:
-                lesson_key = str(progress_update.lesson_id)
-                current_best = user_progress.get('lesson_progress', {}).get(lesson_key, {}).get('best_score', 0)
-                update_data['best_score'] = max(current_best, progress_update.score)
-        if progress_update.pronunciation_score is not None:
-            update_data['pronunciation_score'] = progress_update.pronunciation_score
-        if progress_update.listening_score is not None:
-            update_data['listening_score'] = progress_update.listening_score
-        if progress_update.comprehension_score is not None:
-            update_data['comprehension_score'] = progress_update.comprehension_score
-        if progress_update.time_spent_seconds is not None:
-            update_data['time_spent_seconds'] = progress_update.time_spent_seconds
-        if progress_update.steps_completed is not None:
-            update_data['steps_completed'] = progress_update.steps_completed
-        if progress_update.steps_skipped is not None:
-            update_data['steps_skipped'] = progress_update.steps_skipped
-        # Add completion timestamp if status is completed
-        if progress_update.status == 'completed':
-            update_data['completed_at'] = datetime.utcnow().isoformat() + 'Z'
-            # Increment attempts
-            user_progress = learning_service.get_user_progress(user_id)
-            if user_progress:
-                lesson_key = str(progress_update.lesson_id)
-                current_attempts = user_progress.get('lesson_progress', {}).get(lesson_key, {}).get('attempts', 0)
-                update_data['attempts'] = current_attempts + 1
-        # Save to file
-        success = learning_service.update_lesson_progress(
-            user_id,
-            progress_update.lesson_id,
-            update_data
-        )
-        if not success:
-            raise HTTPException(status_code=500, detail="Failed to save progress")
-        return {
-            "success": True,
-            "message": "Lesson progress updated"
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error updating lesson progress: {e}")
-        raise HTTPException(status_code=500, detail="Failed to update lesson progress")
-@router.post("/progress/vocabulary")
-async def record_vocabulary_review(
-    review: VocabularyReview,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """
-    Record a vocabulary review/practice session
-    Updates mastery level and review statistics for a vocabulary word
-    """
-    try:
-        user_id = token if token else 'anonymous'
-        # Get current vocabulary progress
-        user_progress = learning_service.get_user_progress(user_id)
-        if not user_progress:
-            raise HTTPException(status_code=500, detail="Failed to load user progress")
-        vocab_key = str(review.vocabulary_id)
-        vocab_progress = user_progress.get('vocabulary_progress', {}).get(vocab_key, {
-            'vocabulary_id': review.vocabulary_id,
-            'swahili': review.swahili,
-            'mastery_level': 0,
-            'times_reviewed': 0,
-            'times_correct': 0,
-            'ease_factor': 2.5,
-            'interval_days': 0
-        })
-        # Update review counts
-        vocab_progress['times_reviewed'] = vocab_progress.get('times_reviewed', 0) + 1
-        if review.is_correct:
-            vocab_progress['times_correct'] = vocab_progress.get('times_correct', 0) + 1
-        # Update mastery level if provided
-        if review.mastery_level is not None:
-            vocab_progress['mastery_level'] = review.mastery_level
-        # Update timestamps
-        vocab_progress['last_reviewed_at'] = datetime.utcnow().isoformat() + 'Z'
-        # Calculate next review date using simple spaced repetition
-        # (simplified version - could use SuperMemo SM-2 algorithm)
-        interval_days = vocab_progress.get('interval_days', 0)
-        if review.is_correct:
-            interval_days = max(1, interval_days * 2)  # Double the interval
-        else:
-            interval_days = 1  # Reset to 1 day if incorrect
-        vocab_progress['interval_days'] = interval_days
-        from datetime import timedelta
-        next_review = datetime.utcnow() + timedelta(days=interval_days)
-        vocab_progress['next_review_at'] = next_review.isoformat() + 'Z'
-        # Save to file
-        success = learning_service.update_vocabulary_progress(
-            user_id,
-            review.vocabulary_id,
-            vocab_progress
-        )
-        if not success:
-            raise HTTPException(status_code=500, detail="Failed to save vocabulary progress")
-        return {
-            "success": True,
-            "message": "Vocabulary review recorded",
-            "next_review_at": vocab_progress['next_review_at']
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error recording vocabulary review: {e}")
-        raise HTTPException(status_code=500, detail="Failed to record vocabulary review")
-# ==================== Achievement Endpoints ====================
-@router.get("/achievements")
-async def get_achievements(request: Request, token: Optional[str] = Depends(optional_hf_token)):
-    """
-    Get all available achievements and user's progress on them
-    """
-    try:
-        # Get achievements configuration
-        achievements_config = learning_service.get_achievements()
-        if not achievements_config:
-            raise HTTPException(status_code=404, detail="Achievements not found")
-        # Get user progress
-        user_id = token if token else 'anonymous'
-        user_progress = learning_service.get_user_progress(user_id)
-        # Merge achievement definitions with user progress
-        user_achievements = user_progress.get('achievements', {}) if user_progress else {}
-        achievements_with_progress = []
-        for achievement in achievements_config.get('achievements', []):
-            achievement_id = achievement['achievement_id']
-            achievement_data = {
-                **achievement,
-                'unlocked': False,
-                'progress': 0
-            }
-            # Add user progress if available
-            if achievement_id in user_achievements:
-                achievement_data.update(user_achievements[achievement_id])
-            achievements_with_progress.append(achievement_data)
-        return {
-            "success": True,
-            "achievements": achievements_with_progress,
-            "tiers": achievements_config.get('tiers', {})
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error fetching achievements: {e}")
-        raise HTTPException(status_code=500, detail="Failed to fetch achievements")
-@router.post("/achievements/check")
-async def check_achievement(
-    achievement: AchievementCheck,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """
-    Check and potentially unlock an achievement
-    Updates achievement progress and unlocks if target is reached
-    """
-    try:
-        user_id = token if token else 'anonymous'
-        success = learning_service.unlock_achievement(
-            user_id,
-            achievement.achievement_id,
-            achievement.progress,
-            achievement.target
-        )
-        if not success:
-            raise HTTPException(status_code=500, detail="Failed to update achievement")
-        is_unlocked = achievement.progress >= achievement.target
-        return {
-            "success": True,
-            "unlocked": is_unlocked,
-            "achievement_id": achievement.achievement_id
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error checking achievement: {e}")
-        raise HTTPException(status_code=500, detail="Failed to check achievement")
-# ==================== Statistics Endpoints ====================
-@router.get("/stats")
-async def get_user_stats(request: Request, token: Optional[str] = Depends(optional_hf_token)):
-    """
-    Get user's overall learning statistics
-    Returns aggregated stats like total XP, streak, lessons completed, etc.
-    """
-    try:
-        user_id = token if token else 'anonymous'
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            raise HTTPException(status_code=500, detail="Failed to load user progress")
-        return {
-            "success": True,
-            "stats": progress.get('overall_stats', {}),
-            "daily_stats": progress.get('daily_stats', {})
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error fetching user stats: {e}")
-        raise HTTPException(status_code=500, detail="Failed to fetch user stats")
-# ==================== TTS and ASR Endpoints ====================
-class TTSRequest(BaseModel):
-    text: str
-    language: str
-    messageId: Optional[str] = None
-@router.post("/tts/generate")
-async def generate_tts(
-    tts_request: TTSRequest,
-    request: Request
-):
-    """
-    Generate TTS audio for lesson text
-    """
-    try:
-        from app.main import tts_service
-        # Generate TTS audio
-        audio_data = await tts_service.generate_speech(
-            text=tts_request.text,
-            language_code=tts_request.language
-        )
-        if not audio_data:
-            raise HTTPException(status_code=500, detail="Failed to generate TTS audio")
-        # Return audio as WAV file
-        return Response(
-            content=audio_data,
-            media_type="audio/wav",
-            headers={
-                "Content-Disposition": f"inline; filename=tts_{tts_request.messageId or 'audio'}.wav"
-            }
-        )
-    except Exception as e:
-        logger.error(f"Error generating TTS: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to generate TTS: {str(e)}")
-@router.post("/transcribe")
-async def transcribe_audio(
-    request: Request,
-    audio: UploadFile = File(...)
-):
-    """
-    Transcribe audio for pronunciation practice
-    """
-    try:
-        from app.main import transcription_service
-        # Read audio file
-        audio_bytes = await audio.read()
-        # Get language from form data (default to Swahili)
-        form = await request.form()
-        language = form.get('language', 'swa')
-        # Transcribe
-        text = await transcription_service.transcribe_audio(
-            audio_data=audio_bytes,
-            language_code=language
-        )
-        return {
-            "success": True,
-            "text": text,
-            "language": language
-        }
-    except Exception as e:
-        logger.error(f"Error transcribing audio: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to transcribe: {str(e)}")
-# ==================== Phase 1-3 Endpoints ====================
-# Vocabulary Management
-class VocabularyAddRequest(BaseModel):
-    vocab_id: int
-    source_lesson_id: Optional[int] = None
-class VocabularyReviewRequest(BaseModel):
-    vocab_id: int
-    rating: str  # 'again', 'hard', 'good', 'easy'
-@router.get("/vocabulary/due")
-async def get_due_vocabulary(
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Get vocabulary words due for FSRS review"""
-    try:
-        user_id = token if token else 'anonymous'
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            return {"due_words": [], "total_due": 0}
-        vocab_progress = progress.get('vocabulary_progress', {})
-        now = datetime.utcnow()
-        due_words = []
-        for vocab_id, vocab_data in vocab_progress.items():
-            next_review_str = vocab_data.get('fsrs', {}).get('next_review')
-            if not next_review_str:
-                continue
-            next_review = datetime.fromisoformat(next_review_str.rstrip('Z'))
-            if next_review <= now:
-                hours_overdue = (now - next_review).total_seconds() / 3600
-                vocab_data['priority'] = 1000 - hours_overdue
-                due_words.append(vocab_data)
-        due_words.sort(key=lambda x: x.get('priority', 0), reverse=True)
-        return {
-            "due_words": due_words,
-            "total_due": len(due_words),
-            "timestamp": now.isoformat() + 'Z'
-        }
-    except Exception as e:
-        logger.error(f"Error getting due vocabulary: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get due vocabulary")
-@router.post("/vocabulary/add")
-async def add_vocabulary_to_practice(
-    vocab_request: VocabularyAddRequest,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Add a vocabulary word to user's practice queue with FSRS initialization"""
-    try:
-        user_id = token if token else 'anonymous'
-        vocab = learning_service.get_vocabulary(vocab_request.vocab_id)
-        if not vocab:
-            raise HTTPException(status_code=404, detail="Vocabulary not found")
-        fsrs_data = {
-            'difficulty': 0.3,
-            'stability': 2.5,
-            'retrievability': 1.0,
-            'review_count': 0,
-            'last_review': None,
-            'next_review': datetime.utcnow().isoformat() + 'Z',
-            'lapses': 0,
-            'state': 'new'
-        }
-        user_vocab = {
-            'vocabulary_id': vocab_request.vocab_id,
-            'swahili': vocab.get('swahili', ''),
-            'english': vocab.get('english', ''),
-            'part_of_speech': vocab.get('part_of_speech', 'unknown'),
-            'added_at': datetime.utcnow().isoformat() + 'Z',
-            'added_from': vocab_request.source_lesson_id,
-            'fsrs': fsrs_data,
-            'mastery_level': 0,
-            'times_reviewed': 0,
-            'times_correct': 0,
-            'accuracy': 0.0
-        }
-        success = learning_service.update_vocabulary_progress(
-            user_id, str(vocab_request.vocab_id), user_vocab
-        )
-        if success:
-            return {"success": True, "vocabulary": user_vocab}
-        else:
-            raise HTTPException(status_code=500, detail="Failed to add vocabulary")
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error adding vocabulary: {e}")
-        raise HTTPException(status_code=500, detail="Failed to add vocabulary")
-def calculate_next_review_fsrs(fsrs: Dict, grade: int) -> Dict:
-    """Implement FSRS algorithm"""
-    from datetime import timedelta
-    difficulty = fsrs['difficulty']
-    stability = fsrs['stability']
-    if grade == 0:
-        new_difficulty = min(difficulty + 0.2, 1.0)
-    elif grade == 2:
-        new_difficulty = min(difficulty + 0.1, 1.0)
-    elif grade == 4:
-        new_difficulty = max(difficulty - 0.1, 0.0)
-    else:
-        new_difficulty = difficulty
-    if grade == 0:
-        new_stability = stability * 0.5
-        state = 'relearning'
-        interval_minutes = 10
-    elif grade == 2:
-        new_stability = stability * 1.2
-        state = 'review'
-        interval_minutes = int(new_stability * 24 * 60)
-    elif grade == 3:
-        new_stability = stability * 2.5
-        state = 'review'
-        interval_minutes = int(new_stability * 24 * 60)
-    else:
-        new_stability = stability * 4.0
-        state = 'review'
-        interval_minutes = int(new_stability * 24 * 60)
-    next_review = datetime.utcnow() + timedelta(minutes=interval_minutes)
-    return {
-        'difficulty': new_difficulty,
-        'stability': new_stability,
-        'retrievability': 0.9 if grade >= 2 else 0.0,
-        'review_count': fsrs['review_count'] + 1,
-        'last_review': datetime.utcnow().isoformat() + 'Z',
-        'next_review': next_review.isoformat() + 'Z',
-        'lapses': fsrs['lapses'],
-        'state': state,
-        'interval_days': interval_minutes / (24 * 60)
-    }
-def calculate_mastery_level(vocab: Dict) -> int:
-    """Calculate mastery level (0-5)"""
-    accuracy = vocab['accuracy']
-    reviews = vocab['times_reviewed']
-    stability = vocab['fsrs']['stability']
-    if reviews == 0:
-        return 0
-    elif reviews < 5 or accuracy < 70:
-        return 1
-    elif reviews < 10 or accuracy < 85:
-        return 2
-    elif reviews < 20 or accuracy < 95:
-        return 3
-    elif reviews >= 20 and accuracy >= 95 and stability >= 30:
-        return 4
-    elif reviews >= 40 and accuracy >= 98 and stability >= 90:
-        return 5
-    else:
-        return 3
-@router.post("/vocabulary/review")
-async def record_vocabulary_review_fsrs(
-    review_request: VocabularyReviewRequest,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Record vocabulary review and update FSRS parameters"""
-    try:
-        user_id = token if token else 'anonymous'
-        progress = learning_service.get_user_progress(user_id)
-        if not progress or str(review_request.vocab_id) not in progress.get('vocabulary_progress', {}):
-            raise HTTPException(status_code=404, detail="Vocabulary not in practice queue")
-        vocab = progress['vocabulary_progress'][str(review_request.vocab_id)]
-        fsrs = vocab['fsrs']
-        grade_map = {'again': 0, 'hard': 2, 'good': 3, 'easy': 4}
-        grade = grade_map.get(review_request.rating, 3)
-        new_fsrs = calculate_next_review_fsrs(fsrs, grade)
-        vocab['fsrs'] = new_fsrs
-        vocab['times_reviewed'] += 1
-        if grade >= 2:
-            vocab['times_correct'] += 1
-        else:
-            vocab['fsrs']['lapses'] += 1
-        vocab['accuracy'] = (vocab['times_correct'] / vocab['times_reviewed']) * 100 if vocab['times_reviewed'] > 0 else 0
-        vocab['mastery_level'] = calculate_mastery_level(vocab)
-        vocab['last_reviewed_at'] = datetime.utcnow().isoformat() + 'Z'
-        if 'vocabulary_reviewed' not in progress['overall_stats']:
-            progress['overall_stats']['vocabulary_reviewed'] = 0
-        progress['overall_stats']['vocabulary_reviewed'] += 1
-        learning_service.save_user_progress(user_id, progress)
-        return {
-            "success": True,
-            "vocabulary": vocab,
-            "next_review": new_fsrs['next_review'],
-            "interval_days": new_fsrs['interval_days']
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error recording vocabulary review: {e}")
-        raise HTTPException(status_code=500, detail="Failed to record review")
-@router.get("/vocabulary/stats")
-async def get_vocabulary_stats(
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Get vocabulary mastery statistics"""
-    try:
-        user_id = token if token else 'anonymous'
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            return {
-                "total_words": 0,
-                "in_practice": 0,
-                "mastery_breakdown": {str(i): 0 for i in range(6)},
-                "average_accuracy": 0
-            }
-        vocab_progress = progress.get('vocabulary_progress', {})
-        mastery_breakdown = {str(i): 0 for i in range(6)}
-        total_accuracy = 0
-        total_with_reviews = 0
-        for vocab_data in vocab_progress.values():
-            level = vocab_data.get('mastery_level', 0)
-            mastery_breakdown[str(level)] += 1
-            if vocab_data.get('times_reviewed', 0) > 0:
-                total_accuracy += vocab_data.get('accuracy', 0)
-                total_with_reviews += 1
-        avg_accuracy = total_accuracy / total_with_reviews if total_with_reviews > 0 else 0
-        return {
-            "total_words": len(vocab_progress),
-            "in_practice": len(vocab_progress),
-            "mastery_breakdown": mastery_breakdown,
-            "average_accuracy": round(avg_accuracy, 1),
-            "total_reviews": sum(v.get('times_reviewed', 0) for v in vocab_progress.values())
-        }
-    except Exception as e:
-        logger.error(f"Error getting vocabulary stats: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get stats")
-@router.get("/vocabulary/library")
-async def get_vocabulary_library(
-    lesson_id: Optional[int] = None,
-    level: Optional[str] = None,
-    search: Optional[str] = None,
-    request: Request = None,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Browse all vocabulary with filters"""
-    try:
-        user_id = token if token else 'anonymous'
-        all_vocab = learning_service.get_all_vocabulary()
-        progress = learning_service.get_user_progress(user_id)
-        user_vocab = progress.get('vocabulary_progress', {}) if progress else {}
-        filtered_vocab = all_vocab
-        if lesson_id:
-            filtered_vocab = [v for v in filtered_vocab if v.get('lesson_id') == lesson_id]
-        if level:
-            filtered_vocab = [v for v in filtered_vocab if v.get('level') == level]
-        if search:
-            search_lower = search.lower()
-            filtered_vocab = [v for v in filtered_vocab
-                            if search_lower in v.get('swahili', '').lower()
-                            or search_lower in v.get('english', '').lower()]
-        for vocab in filtered_vocab:
-            vocab_id = str(vocab.get('vocabulary_id') or vocab.get('id'))
-            if vocab_id in user_vocab:
-                vocab['status'] = 'practicing'
-                vocab['mastery_level'] = user_vocab[vocab_id].get('mastery_level', 0)
-                vocab['accuracy'] = user_vocab[vocab_id].get('accuracy', 0)
-                vocab['next_review'] = user_vocab[vocab_id].get('fsrs', {}).get('next_review')
-            else:
-                vocab['status'] = 'not_practicing'
-                vocab['mastery_level'] = 0
-        return {
-            "vocabulary": filtered_vocab,
-            "total": len(filtered_vocab),
-            "filters_applied": {
-                "lesson_id": lesson_id,
-                "level": level,
-                "search": search
-            }
-        }
-    except Exception as e:
-        logger.error(f"Error getting vocabulary library: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get vocabulary")
-# Reading Comprehension
-class ComprehensionAnswer(BaseModel):
-    question_id: str
-    answer: str
-class ComprehensionSubmission(BaseModel):
-    lesson_id: int
-    passage_id: str
-    answers: List[ComprehensionAnswer]
-@router.post("/comprehension/submit")
-async def submit_comprehension_answers(
-    submission: ComprehensionSubmission,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Submit reading comprehension answers and get scoring"""
-    try:
-        user_id = token if token else 'anonymous'
-        lesson = learning_service.get_lesson(submission.lesson_id)
-        if not lesson:
-            raise HTTPException(status_code=404, detail="Lesson not found")
-        passage = None
-        for p in lesson.get('reading_passages', []):
-            if p['passage_id'] == submission.passage_id:
-                passage = p
-                break
-        if not passage:
-            raise HTTPException(status_code=404, detail="Passage not found")
-        results = []
-        correct_count = 0
-        for submitted in submission.answers:
-            question_id = submitted.question_id
-            user_answer = submitted.answer.strip().lower()
-            question = None
-            for q in passage['comprehension_questions']:
-                if q['question_id'] == question_id:
-                    question = q
-                    break
-            if not question:
-                continue
-            correct_answers = [ans.strip().lower() for ans in question.get('correct_answers', [])]
-            is_correct = user_answer in correct_answers
-            if is_correct:
-                correct_count += 1
-            results.append({
-                "question_id": question_id,
-                "correct": is_correct,
-                "user_answer": user_answer,
-                "correct_answer": question['correct_answers'][0] if correct_answers else None,
-                "explanation": question.get('explanation')
-            })
-        score = (correct_count / len(submission.answers)) * 100 if submission.answers else 0
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            progress = learning_service.create_default_progress(user_id)
-        if 'comprehension_scores' not in progress:
-            progress['comprehension_scores'] = {}
-        progress['comprehension_scores'][f"{submission.lesson_id}_{submission.passage_id}"] = {
-            "score": score,
-            "completed_at": datetime.utcnow().isoformat() + 'Z',
-            "attempts": progress['comprehension_scores'].get(f"{submission.lesson_id}_{submission.passage_id}", {}).get('attempts', 0) + 1
-        }
-        learning_service.save_user_progress(user_id, progress)
-        return {
-            "results": results,
-            "score": round(score, 1),
-            "correct": correct_count,
-            "total": len(submission.answers)
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error submitting comprehension: {e}")
-        raise HTTPException(status_code=500, detail="Failed to submit comprehension")
-# Task Scenarios
-class ScenarioProgressUpdate(BaseModel):
-    turn_id: str
-    choice_id: str
-@router.get("/scenarios/{scenario_id}")
-async def get_scenario(
-    scenario_id: str,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Get task scenario with branching dialogue"""
-    try:
-        user_id = token if token else 'anonymous'
-        scenario = learning_service.get_scenario(scenario_id)
-        if not scenario:
-            raise HTTPException(status_code=404, detail="Scenario not found")
-        progress = learning_service.get_user_progress(user_id)
-        scenario_progress = None
-        if progress and 'scenario_progress' in progress:
-            scenario_progress = progress['scenario_progress'].get(scenario_id)
-        return {
-            "scenario": scenario,
-            "user_progress": scenario_progress
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error getting scenario: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get scenario")
-@router.post("/scenarios/{scenario_id}/progress")
-async def update_scenario_progress(
-    scenario_id: str,
-    progress_update: ScenarioProgressUpdate,
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Update scenario progress with user choice"""
-    try:
-        user_id = token if token else 'anonymous'
-        scenario = learning_service.get_scenario(scenario_id)
-        if not scenario:
-            raise HTTPException(status_code=404, detail="Scenario not found")
-        progress = learning_service.get_user_progress(user_id)
-        if not progress:
-            progress = learning_service.create_default_progress(user_id)
-        if 'scenario_progress' not in progress:
-            progress['scenario_progress'] = {}
-        if scenario_id not in progress['scenario_progress']:
-            progress['scenario_progress'][scenario_id] = {
-                "started_at": datetime.utcnow().isoformat() + 'Z',
-                "turns": [],
-                "completed": False
-            }
-        progress['scenario_progress'][scenario_id]['turns'].append({
-            "turn_id": progress_update.turn_id,
-            "choice_id": progress_update.choice_id,
-            "timestamp": datetime.utcnow().isoformat() + 'Z'
-        })
-        turns_count = len(progress['scenario_progress'][scenario_id]['turns'])
-        if turns_count >= scenario.get('required_turns', 6):
-            progress['scenario_progress'][scenario_id]['completed'] = True
-            progress['scenario_progress'][scenario_id]['completed_at'] = datetime.utcnow().isoformat() + 'Z'
-        learning_service.save_user_progress(user_id, progress)
-        return {
-            "success": True,
-            "progress": progress['scenario_progress'][scenario_id]
-        }
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error updating scenario progress: {e}")
-        raise HTTPException(status_code=500, detail="Failed to update scenario")
-@router.get("/scenarios")
-async def list_scenarios(
-    request: Request,
-    token: Optional[str] = Depends(optional_hf_token)
-):
-    """Get list of all available scenarios"""
-    try:
-        scenarios = learning_service.get_all_scenarios()
-        return {
-            "success": True,
-            "scenarios": scenarios,
-            "total": len(scenarios)
-        }
-    except Exception as e:
-        logger.error(f"Error listing scenarios: {e}")
-        raise HTTPException(status_code=500, detail="Failed to list scenarios")

app/routers/mobile.py DELETED Viewed

@@ -1,536 +0,0 @@
-from fastapi import APIRouter, HTTPException, File, UploadFile, Form, Query, Depends
-from fastapi.responses import Response
-from typing import Optional
-from pydantic import BaseModel
-import base64
-import json
-import uuid
-import datetime
-from app.services.translation_service import TranslationService
-from app.services.tts_service import TTSService
-from app.services.transcription_service import TranscriptionService
-from app.auth import require_hf_token
-router = APIRouter()
-# Service instances - these will be injected by main app
-translation_service = None
-tts_service = None
-transcription_service = None
-# Mobile-specific data models
-class MobileSessionRequest(BaseModel):
-    user_name: str
-    default_source_lang: str = "eng"
-    default_target_lang: str = "swa"
-class MobileSessionResponse(BaseModel):
-    session_id: str
-    participant_id: str
-    user_name: str
-    source_language: str
-    target_language: str
-class MobileTranscribeRequest(BaseModel):
-    participant_id: str
-    source_language: str
-    target_language: str
-    is_final_chunk: bool = False
-class MobileLanguageUpdateRequest(BaseModel):
-    participant_id: str
-    source_language: str
-    target_language: str
-# In-memory session storage (in production, use Redis or database)
-mobile_sessions = {}
-@router.post("/mobile/session/create", response_model=MobileSessionResponse)
-async def create_mobile_session(
-    user_name: str = Form(...),
-    default_source_lang: str = Form("eng"),
-    default_target_lang: str = Form("swa"),
-    token: str = Depends(require_hf_token)
-):
-    """Create a mobile-specific single-user session"""
-    try:
-        print(f"=== MOBILE SESSION CREATE REQUEST ===")
-        print(f"User name: {user_name}")
-        print(f"Source language: {default_source_lang}")
-        print(f"Target language: {default_target_lang}")
-        # Validate inputs
-        if not user_name or user_name.strip() == "":
-            raise HTTPException(status_code=400, detail="User name is required")
-        # Validate language codes
-        valid_languages = ["eng", "swa", "kik", "kam", "mer", "luo", "som"]
-        if default_source_lang not in valid_languages:
-            print(f"Invalid source language: {default_source_lang}, defaulting to 'eng'")
-            default_source_lang = "eng"
-        if default_target_lang not in valid_languages:
-            print(f"Invalid target language: {default_target_lang}, defaulting to 'swa'")
-            default_target_lang = "swa"
-        session_id = f"mobile-{uuid.uuid4().hex[:8]}"
-        participant_id = f"user-{uuid.uuid4().hex[:8]}"
-        # Store session data
-        mobile_sessions[session_id] = {
-            "session_id": session_id,
-            "participant_id": participant_id,
-            "user_name": user_name.strip(),
-            "source_language": default_source_lang,
-            "target_language": default_target_lang,
-            "created_at": datetime.datetime.now().isoformat()
-        }
-        print(f"Created session: {session_id} for user: {user_name}")
-        print(f"Total sessions: {len(mobile_sessions)}")
-        response = MobileSessionResponse(
-            session_id=session_id,
-            participant_id=participant_id,
-            user_name=user_name.strip(),
-            source_language=default_source_lang,
-            target_language=default_target_lang
-        )
-        print(f"Returning response: {response}")
-        return response
-    except HTTPException:
-        raise
-    except Exception as e:
-        print(f"ERROR creating mobile session: {e}")
-        import traceback
-        traceback.print_exc()
-        raise HTTPException(status_code=500, detail=f"Failed to create mobile session: {str(e)}")
-@router.get("/mobile/session/{session_id}")
-async def get_mobile_session(session_id: str, token: str = Depends(require_hf_token)):
-    """Get mobile session details"""
-    if session_id not in mobile_sessions:
-        raise HTTPException(status_code=404, detail="Session not found")
-    return mobile_sessions[session_id]
-@router.put("/mobile/session/{session_id}/languages")
-async def update_session_languages(
-    session_id: str,
-    participant_id: str = Form(...),
-    source_language: str = Form(...),
-    target_language: str = Form(...)
-):
-    """Update the default languages for a mobile session"""
-    try:
-        if session_id not in mobile_sessions:
-            raise HTTPException(status_code=404, detail="Session not found")
-        session = mobile_sessions[session_id]
-        if session["participant_id"] != participant_id:
-            raise HTTPException(status_code=403, detail="Invalid participant")
-        # Update session languages
-        session["source_language"] = source_language
-        session["target_language"] = target_language
-        return {
-            "success": True,
-            "session_id": session_id,
-            "source_language": source_language,
-            "target_language": target_language
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Failed to update languages: {str(e)}")
-@router.post("/mobile/session/{session_id}/transcribe-realtime")
-async def transcribe_realtime(
-    session_id: str,
-    audio: UploadFile = File(...),
-    participant_id: str = Form(...),
-    source_language: str = Form(...),
-    target_language: str = Form(...),
-    is_final_chunk: bool = Form(False),
-    chunk_sequence: int = Form(0)
-):
-    """Real-time transcription endpoint for mobile with streaming support"""
-    try:
-        if session_id not in mobile_sessions:
-            raise HTTPException(status_code=404, detail="Session not found")
-        session = mobile_sessions[session_id]
-        if session["participant_id"] != participant_id:
-            raise HTTPException(status_code=403, detail="Invalid participant")
-        # Read audio file
-        audio_data = await audio.read()
-        # Generate unique message ID for this chunk sequence
-        message_id = f"msg-{participant_id}-{chunk_sequence}"
-        # Initialize response data
-        response_data = {
-            "success": True,
-            "message_id": message_id,
-            "chunk_sequence": chunk_sequence,
-            "original_text": "",
-            "original_language": source_language,
-            "is_final_chunk": is_final_chunk,
-            "is_interim": not is_final_chunk,
-            "session_id": session_id,
-            "translated_text": None,
-            "target_language": target_language,
-            "has_audio": False,
-            "audio_base64": None,
-            "audio_format": None
-        }
-        # Process transcription
-        if transcription_service:
-            try:
-                # Use streaming transcription if available
-                if hasattr(transcription_service, 'process_realtime_chunk'):
-                    transcription_result = await transcription_service.process_realtime_chunk(
-                        audio_data, source_language, participant_id, is_final_chunk
-                    )
-                else:
-                    # Fallback to regular transcription
-                    transcription_result = await transcription_service.transcribe_audio(
-                        audio_data, source_language
-                    )
-                response_data["original_text"] = transcription_result or ""
-                # Only process translation and TTS for final chunks with actual text
-                if is_final_chunk and transcription_result and transcription_result.strip():
-                    if translation_service:
-                        try:
-                            translated_text = await translation_service.translate_text(
-                                transcription_result, source_language, target_language
-                            )
-                            response_data["translated_text"] = translated_text
-                            # Generate TTS audio in target language
-                            if tts_service and translated_text:
-                                try:
-                                    tts_audio = await tts_service.generate_speech(
-                                        translated_text, target_language, output_format="wav"
-                                    )
-                                    if tts_audio:
-                                        response_data.update({
-                                            "has_audio": True,
-                                            "audio_base64": base64.b64encode(tts_audio).decode('utf-8'),
-                                            "audio_format": "wav"
-                                        })
-                                except Exception as tts_error:
-                                    print(f"TTS generation failed: {tts_error}")
-                                    # Continue without TTS
-                        except Exception as translation_error:
-                            print(f"Translation failed: {translation_error}")
-                            # Continue without translation
-            except Exception as transcription_error:
-                print(f"Transcription failed: {transcription_error}")
-                response_data["original_text"] = ""
-            return response_data
-        else:
-            raise HTTPException(status_code=500, detail="Transcription service not available")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Real-time transcription failed: {str(e)}")
-@router.post("/mobile/session/{session_id}/stream-audio")
-async def stream_audio_chunk(
-    session_id: str,
-    participant_id: str = Form(...),
-    audio_chunk: UploadFile = File(...),
-    source_language: str = Form(...),
-    target_language: str = Form(...),
-    chunk_index: int = Form(0),
-    is_speaking: bool = Form(True),
-    force_complete: bool = Form(False)
-):
-    """Stream audio chunks for continuous processing"""
-    try:
-        if session_id not in mobile_sessions:
-            raise HTTPException(status_code=404, detail="Session not found")
-        session = mobile_sessions[session_id]
-        if session["participant_id"] != participant_id:
-            raise HTTPException(status_code=403, detail="Invalid participant")
-        audio_data = await audio_chunk.read()
-        # Use streaming approach similar to WebSocket
-        interim_text = ""
-        if transcription_service:
-            try:
-                if hasattr(transcription_service, 'process_audio_chunk'):
-                    result = await transcription_service.process_audio_chunk(
-                        audio_data,
-                        source_language,
-                        participant_id,
-                        has_voice_activity=is_speaking,
-                        progress_callback=None,  # No callback for HTTP
-                        sentence_callback=None   # No callback for HTTP
-                    )
-                    interim_text = result or ""
-                else:
-                    # Fallback to regular transcription for interim results
-                    interim_text = await transcription_service.transcribe_audio(
-                        audio_data, source_language
-                    ) or ""
-            except Exception as e:
-                print(f"Streaming transcription error: {e}")
-                interim_text = ""
-            return {
-                "success": True,
-                "chunk_index": chunk_index,
-                "session_id": session_id,
-                "interim_text": interim_text,
-                "is_speaking": is_speaking,
-                "force_complete": force_complete
-            }
-        else:
-            raise HTTPException(status_code=500, detail="Transcription service not available")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Audio streaming failed: {str(e)}")
-@router.get("/mobile/session/{session_id}/realtime-status")
-async def get_realtime_status(session_id: str, participant_id: str = Query(...)):
-    """Get current real-time processing status"""
-    try:
-        if session_id not in mobile_sessions:
-            raise HTTPException(status_code=404, detail="Session not found")
-        session = mobile_sessions[session_id]
-        if session["participant_id"] != participant_id:
-            raise HTTPException(status_code=403, detail="Invalid participant")
-        # Check if transcription service has any pending messages
-        pending_messages = []
-        if transcription_service:
-            try:
-                if hasattr(transcription_service, 'get_participant_status'):
-                    pending_messages = transcription_service.get_participant_status(participant_id)
-                else:
-                    pending_messages = []
-            except Exception as e:
-                print(f"Error getting participant status: {e}")
-                pending_messages = []
-        return {
-            "session_id": session_id,
-            "participant_id": participant_id,
-            "is_active": True,
-            "pending_messages": pending_messages,
-            "current_languages": {
-                "source": session["source_language"],
-                "target": session["target_language"]
-            },
-            "service_status": {
-                "transcription": transcription_service is not None,
-                "translation": translation_service is not None,
-                "tts": tts_service is not None
-            }
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Status check failed: {str(e)}")
-@router.post("/mobile/session/{session_id}/transcribe-with-languages")
-async def transcribe_with_languages_legacy(
-    session_id: str,
-    audio: UploadFile = File(...),
-    participant_id: str = Form(...),
-    source_language: str = Form(...),
-    target_language: str = Form(...),
-    is_final_chunk: bool = Form(False)
-):
-    """Legacy endpoint - transcribe audio with specific source/target languages for mobile"""
-    try:
-        if session_id not in mobile_sessions:
-            raise HTTPException(status_code=404, detail="Session not found")
-        session = mobile_sessions[session_id]
-        if session["participant_id"] != participant_id:
-            raise HTTPException(status_code=403, detail="Invalid participant")
-        # Read audio file
-        audio_data = await audio.read()
-        # Generate unique message ID
-        message_id = f"msg-{uuid.uuid4().hex[:8]}"
-        # Initialize response
-        response_data = {
-            "success": True,
-            "message_id": message_id,
-            "original_text": "",
-            "original_language": source_language,
-            "translated_text": None,
-            "target_language": target_language,
-            "has_audio": False,
-            "is_final_chunk": is_final_chunk,
-            "audio_base64": None
-        }
-        # Process transcription in source language
-        if transcription_service:
-            try:
-                transcription_result = await transcription_service.transcribe_audio(
-                    audio_data, source_language
-                )
-                response_data["original_text"] = transcription_result or ""
-                # Process translation to target language
-                if translation_service and transcription_result and transcription_result.strip():
-                    try:
-                        translated_text = await translation_service.translate_text(
-                            transcription_result, source_language, target_language
-                        )
-                        response_data["translated_text"] = translated_text
-                        # Generate TTS audio in target language
-                        if tts_service and translated_text:
-                            try:
-                                tts_audio = await tts_service.generate_speech(
-                                    translated_text, target_language, output_format="wav"
-                                )
-                                if tts_audio:
-                                    response_data["has_audio"] = True
-                                    response_data["audio_base64"] = base64.b64encode(tts_audio).decode('utf-8')
-                            except Exception as tts_error:
-                                print(f"TTS generation failed: {tts_error}")
-                    except Exception as translation_error:
-                        print(f"Translation failed: {translation_error}")
-            except Exception as transcription_error:
-                print(f"Transcription failed: {transcription_error}")
-            return response_data
-        else:
-            raise HTTPException(status_code=500, detail="Transcription service not available")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
-@router.post("/mobile/translate")
-async def translate_text_mobile(
-    text: str = Form(...),
-    source_lang: str = Form(...),
-    target_lang: str = Form(...)
-):
-    """Mobile-friendly text translation endpoint"""
-    try:
-        if not translation_service:
-            raise HTTPException(status_code=500, detail="Translation service not initialized")
-        # Map common language codes to internal format
-        lang_mapping = {
-            "english": "eng", "en": "eng",
-            "swahili": "swa", "sw": "swa",
-            "kikuyu": "kik", "ki": "kik",
-            "kamba": "kam", "kam": "kam",
-            "kimeru": "mer", "mer": "mer",
-            "luo": "luo", "luo": "luo",
-            "somali": "som", "so": "som"
-        }
-        source_code = lang_mapping.get(source_lang.lower(), source_lang.lower())
-        target_code = lang_mapping.get(target_lang.lower(), target_lang.lower())
-        translated_text = await translation_service.translate_text(text, source_code, target_code)
-        return {
-            "success": True,
-            "original_text": text,
-            "translated_text": translated_text or text,
-            "source_language": source_code,
-            "target_language": target_code
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
-@router.get("/mobile/languages")
-async def get_supported_languages():
-    """Get list of supported languages for mobile app"""
-    return {
-        "supported_languages": [
-            {"code": "eng", "name": "English", "display_name": "English (eng)"},
-            {"code": "swa", "name": "Swahili", "display_name": "Swahili (swa)"},
-            {"code": "kik", "name": "Kikuyu", "display_name": "Kikuyu (kik)"},
-            {"code": "kam", "name": "Kamba", "display_name": "Kamba (kam)"},
-            {"code": "mer", "name": "Kimeru", "display_name": "Kimeru (mer)"},
-            {"code": "luo", "name": "Luo", "display_name": "Luo (luo)"},
-            {"code": "som", "name": "Somali", "display_name": "Somali (som)"}
-        ]
-    }
-@router.get("/mobile/test")
-async def test_mobile_endpoints():
-    """Test endpoint for mobile app connectivity"""
-    return {
-        "status": "Mobile API is working",
-        "endpoints": [
-            "/mobile/session/create",
-            "/mobile/session/{session_id}",
-            "/mobile/session/{session_id}/languages",
-            "/mobile/session/{session_id}/transcribe-realtime",
-            "/mobile/session/{session_id}/stream-audio",
-            "/mobile/session/{session_id}/realtime-status",
-            "/mobile/session/{session_id}/transcribe-with-languages",
-            "/mobile/translate",
-            "/mobile/languages",
-            "/mobile/test"
-        ],
-        "timestamp": datetime.datetime.now().isoformat(),
-        "services_available": {
-            "transcription": transcription_service is not None,
-            "translation": translation_service is not None,
-            "tts": tts_service is not None
-        },
-        "active_sessions": len(mobile_sessions),
-        "session_list": list(mobile_sessions.keys())
-    }
-@router.post("/mobile/test-session")
-async def test_session_creation(
-    test_user: str = Form("TestUser"),
-    test_source: str = Form("eng"),
-    test_target: str = Form("swa")
-):
-    """Test session creation with debug info"""
-    try:
-        print(f"=== TEST SESSION CREATE ===")
-        print(f"Received: user={test_user}, source={test_source}, target={test_target}")
-        session_id = f"test-{uuid.uuid4().hex[:8]}"
-        return {
-            "success": True,
-            "test_session_id": session_id,
-            "received_params": {
-                "user": test_user,
-                "source": test_source,
-                "target": test_target
-            },
-            "form_processing": "OK"
-        }
-    except Exception as e:
-        print(f"Test session error: {e}")
-        return {
-            "success": False,
-            "error": str(e)
-        }

app/routers/sessions.py DELETED Viewed

@@ -1,200 +0,0 @@
-from fastapi import APIRouter, HTTPException, Response, Depends
-from typing import List
-from pydantic import BaseModel
-import qrcode
-import io
-from app.models import Session, SessionCreate
-from app.auth import require_hf_token, optional_hf_token
-router = APIRouter()
-# This will be set by the main app
-session_manager = None
-# Initialize services (these will be injected by main app)
-transcription_service = None
-translation_service = None
-tts_service = None
-class TextTranslationRequest(BaseModel):
-    text: str
-    source_language: str
-    target_language: str
-class TextTranslationResponse(BaseModel):
-    original_text: str
-    translated_text: str
-    source_language: str
-    target_language: str
-@router.post("/sessions", response_model=Session)
-async def create_session(session_data: SessionCreate, token: str = Depends(require_hf_token)):
-    """Create a new transcription session"""
-    try:
-        session = await session_manager.create_session(session_data)
-        return session
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@router.get("/sessions", response_model=List[Session])
-async def get_all_sessions(token: str = Depends(require_hf_token)):
-    """Get all active sessions"""
-    try:
-        sessions = await session_manager.get_all_sessions()
-        return sessions
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@router.get("/sessions/{session_id}", response_model=Session)
-async def get_session(session_id: str, token: str = Depends(require_hf_token)):
-    """Get specific session by ID or short code"""
-    session = await session_manager.get_session(session_id)
-    if not session:
-        raise HTTPException(status_code=404, detail="Session not found")
-    return session
-@router.get("/sessions/{session_id}/short-code")
-async def get_session_short_code(session_id: str, token: str = Depends(require_hf_token)):
-    """Get short code for a session"""
-    session = await session_manager.get_session(session_id)
-    if not session:
-        raise HTTPException(status_code=404, detail="Session not found")
-    short_code = session_manager.get_short_code(session.id)
-    return {"session_id": session.id, "short_code": short_code}
-@router.delete("/sessions/{session_id}")
-async def delete_session(session_id: str, token: str = Depends(require_hf_token)):
-    """Delete a session"""
-    success = await session_manager.delete_session(session_id)
-    if not success:
-        raise HTTPException(status_code=404, detail="Session not found")
-    return {"message": "Session deleted successfully"}
-@router.post("/sessions/{session_id}/languages/{language_code}")
-async def add_language_to_session(session_id: str, language_code: str, token: str = Depends(require_hf_token)):
-    """Add a language to a session"""
-    from app.models import LanguageCode
-    # Convert string to LanguageCode enum
-    try:
-        lang_code_enum = LanguageCode(language_code)
-    except ValueError:
-        raise HTTPException(status_code=400, detail=f"Invalid language code: {language_code}")
-    success = await session_manager.add_language_to_session(session_id, lang_code_enum)
-    if success:
-        session = await session_manager.get_session(session_id)
-        return {"message": f"Language {language_code} added to session", "session": session}
-    else:
-        # Check if session exists
-        session = await session_manager.get_session(session_id)
-        if not session:
-            raise HTTPException(status_code=404, detail="Session not found")
-        return {"message": f"Language {language_code} already exists in session", "session": session}
-@router.post("/translate", response_model=TextTranslationResponse)
-async def translate_text(request: TextTranslationRequest, token: str = Depends(require_hf_token)):
-    """Translate text from source language to target language"""
-    try:
-        # Map language codes to proper names
-        lang_map = {
-            'eng': 'English',
-            'swa': 'Swahili',
-            'kik': 'Kikuyu',
-            'kam': 'Kamba',
-            'mer': 'Kimeru',
-            'luo': 'Luo',
-            'som': 'Somali'
-        }
-        source_lang_name = lang_map.get(request.source_language.lower(), request.source_language)
-        target_lang_name = lang_map.get(request.target_language.lower(), request.target_language)
-        # Perform translation
-        translated_text = await translation_service.translate_text(
-            text=request.text,
-            source_lang=source_lang_name,
-            target_lang=target_lang_name
-        )
-        return TextTranslationResponse(
-            original_text=request.text,
-            translated_text=translated_text,
-            source_language=request.source_language,
-            target_language=request.target_language
-        )
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
-@router.get("/test")
-async def test_endpoint(token: str = Depends(optional_hf_token)):
-    """Test endpoint to verify API is working"""
-    auth_status = "authenticated" if token else "public"
-    return {
-        "status": "API is working",
-        "sessions_count": len(session_manager.sessions),
-        "auth_status": auth_status
-    }
-@router.get("/test/translation")
-async def test_translation(token: str = Depends(require_hf_token)):
-    """Test translation service directly"""
-    try:
-        # Test English to Swahili translation
-        result = await translation_service.translate_text("Hello, how are you?", "English", "Swahili")
-        return {
-            "status": "Translation test completed",
-            "original": "Hello, how are you?",
-            "translated": result,
-            "source_lang": "English",
-            "target_lang": "Swahili"
-        }
-    except Exception as e:
-        return {"status": "Translation test failed", "error": str(e)}
-@router.get("/test/tts")
-async def test_tts(token: str = Depends(require_hf_token)):
-    """Test TTS service directly"""
-    try:
-        # Test TTS generation
-        audio_data = await tts_service.generate_speech("Hello world", "eng")
-        return {
-            "status": "TTS test completed",
-            "text": "Hello world",
-            "language": "eng",
-            "audio_generated": audio_data is not None,
-            "audio_size": len(audio_data) if audio_data else 0
-        }
-    except Exception as e:
-        return {"status": "TTS test failed", "error": str(e)}
-@router.get("/sessions/{session_id}/qr-code")
-async def get_session_qr_code(session_id: str, token: str = Depends(require_hf_token)):
-    """Generate QR code for session"""
-    if session_manager is None:
-        raise HTTPException(status_code=500, detail="Session manager not initialized")
-    session = await session_manager.get_session(session_id)
-    if not session:
-        raise HTTPException(status_code=404, detail="Session not found")
-    # Generate QR code with session join URL - use your HF space URL
-    join_url = f"https://mutisya-realtime-translator-5-27-25-v2.hf.space/?join={session_id}"
-    qr = qrcode.QRCode(version=1, box_size=10, border=5)
-    qr.add_data(join_url)
-    qr.make(fit=True)
-    img = qr.make_image(fill_color="black", back_color="white")
-    # Convert to bytes
-    img_buffer = io.BytesIO()
-    img.save(img_buffer, format='PNG')
-    img_buffer.seek(0)
-    return Response(content=img_buffer.getvalue(), media_type="image/png")

app/routers/watch.py DELETED Viewed

@@ -1,152 +0,0 @@
-from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Depends
-from fastapi.responses import Response
-from typing import Optional
-import io
-import base64
-from app.services.transcription_service import TranscriptionService
-from app.services.translation_service import TranslationService
-from app.services.tts_service import TTSService
-from app.models import LanguageCode
-from pydantic import BaseModel
-from app.auth import require_hf_token
-router = APIRouter()
-class WatchTranslationRequest(BaseModel):
-    source_language: str
-    target_language: str
-    audio_base64: str
-class WatchTranslationResponse(BaseModel):
-    original_text: str
-    original_language: str
-    translated_text: str
-    target_language: str
-    translated_audio_base64: str
-    success: bool
-    error: Optional[str] = None
-# Initialize services (these will be injected by main app)
-transcription_service = None
-translation_service = None
-tts_service = None
-@router.post("/watch/translate", response_model=WatchTranslationResponse)
-async def watch_translate_audio(request: WatchTranslationRequest, token: str = Depends(require_hf_token)):
-    """
-    Process audio for watch app translation
-    - Transcribe audio using source language model
-    - Translate text to target language
-    - Generate TTS audio for target language
-    - Return all data to watch app
-    """
-    try:
-        # Validate languages
-        source_lang = request.source_language.lower()
-        target_lang = request.target_language.lower()
-        if source_lang not in ['eng', 'swa', 'kik', 'kam', 'mer', 'luo', 'som']:
-            raise HTTPException(status_code=400, detail=f"Unsupported source language: {source_lang}")
-        if target_lang not in ['eng', 'swa', 'kik', 'kam', 'mer', 'luo', 'som']:
-            raise HTTPException(status_code=400, detail=f"Unsupported target language: {target_lang}")
-        # Decode base64 audio
-        try:
-            audio_data = base64.b64decode(request.audio_base64)
-            print(f"Decoded audio data: {len(audio_data)} bytes")
-        except Exception as e:
-            raise HTTPException(status_code=400, detail=f"Invalid base64 audio data: {str(e)}")
-        # Step 1: Transcribe audio
-        print(f"Transcribing audio with {source_lang} model...")
-        transcribed_text = await transcription_service.transcribe_audio(audio_data, source_lang)
-        if not transcribed_text or transcribed_text.strip() == "":
-            return WatchTranslationResponse(
-                original_text="",
-                original_language=source_lang,
-                translated_text="No speech detected",
-                target_language=target_lang,
-                translated_audio_base64="",
-                success=False,
-                error="No speech detected in audio"
-            )
-        print(f"Transcribed text: {transcribed_text}")
-        # Step 2: Translate text (skip if source and target are the same)
-        if source_lang == target_lang:
-            translated_text = transcribed_text
-        else:
-            print(f"Translating from {source_lang} to {target_lang}...")
-            # Convert language codes to full names for translation service
-            lang_name_map = {
-                'eng': 'English',
-                'swa': 'Swahili',
-                'kik': 'Kikuyu',
-                'kam': 'Kamba',
-                'mer': 'Kimeru',
-                'luo': 'Luo',
-                'som': 'Somali'
-            }
-            source_lang_name = lang_name_map.get(source_lang, 'English')
-            target_lang_name = lang_name_map.get(target_lang, 'Swahili')
-            translated_text = await translation_service.translate_text(
-                transcribed_text,
-                source_lang_name,
-                target_lang_name
-            )
-        print(f"Translated text: {translated_text}")
-        # Step 3: Generate TTS audio for translated text (Android-compatible WAV format)
-        print(f"Generating TTS audio for {target_lang} in WAV format for Android...")
-        tts_audio_data = await tts_service.generate_speech(translated_text, target_lang, output_format="wav")
-        # Encode TTS audio as base64
-        tts_audio_base64 = ""
-        if tts_audio_data:
-            tts_audio_base64 = base64.b64encode(tts_audio_data).decode('utf-8')
-            print(f"TTS audio generated: {len(tts_audio_data)} bytes, base64: {len(tts_audio_base64)} chars")
-        else:
-            print("TTS audio generation failed - no data returned")
-        return WatchTranslationResponse(
-            original_text=transcribed_text,
-            original_language=source_lang,
-            translated_text=translated_text,
-            target_language=target_lang,
-            translated_audio_base64=tts_audio_base64,
-            success=True
-        )
-    except Exception as e:
-        print(f"Error in watch translation: {str(e)}")
-        import traceback
-        traceback.print_exc()
-        return WatchTranslationResponse(
-            original_text="",
-            original_language=request.source_language,
-            translated_text="",
-            target_language=request.target_language,
-            translated_audio_base64="",
-            success=False,
-            error=str(e)
-        )
-@router.get("/watch/test")
-async def test_watch_endpoint(token: str = Depends(require_hf_token)):
-    """Test endpoint for watch app connectivity"""
-    return {
-        "status": "Watch API is working",
-        "services": {
-            "transcription": transcription_service is not None,
-            "translation": translation_service is not None,
-            "tts": tts_service is not None
-        }
-    }

app/services/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # Services package

app/services/learning_data_service.py DELETED Viewed

@@ -1,415 +0,0 @@
-"""
-Learning Data Service - File-based data access for language learning prototype
-This service provides access to lesson data, user progress, and achievements
-using JSON files stored in the backend/data/learning directory.
-"""
-import json
-import os
-from pathlib import Path
-from typing import Dict, List, Optional, Any
-from datetime import datetime
-import logging
-logger = logging.getLogger(__name__)
-class LearningDataService:
-    """Service for managing language learning data using JSON files"""
-    def __init__(self):
-        # Get the data directory relative to this file
-        self.data_dir = Path(__file__).parent.parent.parent / "data" / "learning"
-        self.lessons_dir = self.data_dir / "lessons"
-        self.users_dir = self.data_dir / "users"
-        # Ensure directories exist
-        self.users_dir.mkdir(parents=True, exist_ok=True)
-        logger.info(f"Learning data directory: {self.data_dir}")
-        logger.info(f"Lessons directory: {self.lessons_dir}")
-        logger.info(f"Users directory: {self.users_dir}")
-    # ==================== Lesson Data ====================
-    def get_lessons_index(self, language: str = 'swahili') -> Optional[Dict]:
-        """Load the lessons index/catalog for a specific language"""
-        try:
-            # Map language codes to folder names
-            language_map = {
-                'swahili': 'swahili',
-                'swa': 'swahili',
-                'kamba': 'kamba',
-                'kam': 'kamba',
-                'maasai': 'maasai',
-                'mas': 'maasai'
-            }
-            language_folder = language_map.get(language.lower(), 'swahili')
-            index_path = self.lessons_dir / language_folder / "index.json"
-            logger.info(f"Loading lessons index for language '{language}' -> folder '{language_folder}' at {index_path}")
-            if not index_path.exists():
-                logger.warning(f"Lessons index not found at {index_path}")
-                logger.info(f"Lessons dir contents: {list(self.lessons_dir.iterdir())}")
-                return None
-            with open(index_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-                logger.info(f"Successfully loaded {len(data.get('lessons', []))} lessons for {language}")
-                return data
-        except Exception as e:
-            logger.error(f"Error loading lessons index for {language}: {e}")
-            return None
-    def get_lesson(self, lesson_id: int, language: str = 'swahili') -> Optional[Dict]:
-        """Load a specific lesson by ID for a specific language"""
-        try:
-            # Map language codes to folder names
-            language_map = {
-                'swahili': 'swahili',
-                'swa': 'swahili',
-                'kamba': 'kamba',
-                'kam': 'kamba',
-                'maasai': 'maasai',
-                'mas': 'maasai'
-            }
-            language_folder = language_map.get(language.lower(), 'swahili')
-            # First get the index to find the lesson file
-            index = self.get_lessons_index(language)
-            if not index:
-                return None
-            # Find the lesson in the index
-            lesson_meta = None
-            for lesson in index.get('lessons', []):
-                if lesson['lesson_id'] == lesson_id:
-                    lesson_meta = lesson
-                    break
-            if not lesson_meta:
-                logger.warning(f"Lesson {lesson_id} not found in index for {language}")
-                return None
-            # Load the lesson file
-            lesson_path = self.lessons_dir / language_folder / lesson_meta['file']
-            if not lesson_path.exists():
-                logger.warning(f"Lesson file not found: {lesson_path}")
-                return None
-            with open(lesson_path, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except Exception as e:
-            logger.error(f"Error loading lesson {lesson_id} for {language}: {e}")
-            return None
-    def get_available_lessons(self) -> List[Dict]:
-        """Get list of available lessons (not planned)"""
-        try:
-            index = self.get_lessons_index()
-            if not index:
-                return []
-            available = [
-                lesson for lesson in index.get('lessons', [])
-                if lesson.get('status') == 'available'
-            ]
-            return available
-        except Exception as e:
-            logger.error(f"Error getting available lessons: {e}")
-            return []
-    # ==================== Achievements ====================
-    def get_achievements(self) -> Optional[Dict]:
-        """Load achievements configuration"""
-        try:
-            achievements_path = self.data_dir / "achievements.json"
-            if not achievements_path.exists():
-                logger.warning(f"Achievements file not found at {achievements_path}")
-                return None
-            with open(achievements_path, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except Exception as e:
-            logger.error(f"Error loading achievements: {e}")
-            return None
-    # ==================== User Progress ====================
-    def get_user_progress(self, user_id: str) -> Optional[Dict]:
-        """Load user progress data"""
-        try:
-            user_file = self.users_dir / f"user-{user_id}.json"
-            if not user_file.exists():
-                # Return default progress structure for new users
-                return self._create_default_user_progress(user_id)
-            with open(user_file, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except Exception as e:
-            logger.error(f"Error loading user progress for {user_id}: {e}")
-            return None
-    def save_user_progress(self, user_id: str, progress_data: Dict) -> bool:
-        """Save user progress data"""
-        try:
-            user_file = self.users_dir / f"user-{user_id}.json"
-            # Update last_active timestamp
-            if 'profile' in progress_data:
-                progress_data['profile']['last_active'] = datetime.utcnow().isoformat() + 'Z'
-            with open(user_file, 'w', encoding='utf-8') as f:
-                json.dump(progress_data, f, indent=2, ensure_ascii=False)
-            logger.info(f"Saved progress for user {user_id}")
-            return True
-        except Exception as e:
-            logger.error(f"Error saving user progress for {user_id}: {e}")
-            return False
-    def update_lesson_progress(
-        self,
-        user_id: str,
-        lesson_id: int,
-        progress_update: Dict
-    ) -> bool:
-        """Update progress for a specific lesson"""
-        try:
-            user_progress = self.get_user_progress(user_id)
-            if not user_progress:
-                return False
-            # Initialize lesson_progress if it doesn't exist
-            if 'lesson_progress' not in user_progress:
-                user_progress['lesson_progress'] = {}
-            lesson_key = str(lesson_id)
-            # Update or create lesson progress
-            if lesson_key in user_progress['lesson_progress']:
-                user_progress['lesson_progress'][lesson_key].update(progress_update)
-            else:
-                user_progress['lesson_progress'][lesson_key] = progress_update
-            return self.save_user_progress(user_id, user_progress)
-        except Exception as e:
-            logger.error(f"Error updating lesson progress: {e}")
-            return False
-    def update_vocabulary_progress(
-        self,
-        user_id: str,
-        vocab_id: int,
-        vocab_update: Dict
-    ) -> bool:
-        """Update progress for a specific vocabulary word"""
-        try:
-            user_progress = self.get_user_progress(user_id)
-            if not user_progress:
-                return False
-            # Initialize vocabulary_progress if it doesn't exist
-            if 'vocabulary_progress' not in user_progress:
-                user_progress['vocabulary_progress'] = {}
-            vocab_key = str(vocab_id)
-            # Update or create vocabulary progress
-            if vocab_key in user_progress['vocabulary_progress']:
-                user_progress['vocabulary_progress'][vocab_key].update(vocab_update)
-            else:
-                user_progress['vocabulary_progress'][vocab_key] = vocab_update
-            return self.save_user_progress(user_id, user_progress)
-        except Exception as e:
-            logger.error(f"Error updating vocabulary progress: {e}")
-            return False
-    def unlock_achievement(
-        self,
-        user_id: str,
-        achievement_id: str,
-        progress: int,
-        target: int
-    ) -> bool:
-        """Unlock or update progress on an achievement"""
-        try:
-            user_progress = self.get_user_progress(user_id)
-            if not user_progress:
-                return False
-            # Initialize achievements if it doesn't exist
-            if 'achievements' not in user_progress:
-                user_progress['achievements'] = {}
-            # Update achievement
-            achievement_data = {
-                'achievement_id': achievement_id,
-                'unlocked': progress >= target,
-                'progress': progress,
-                'target': target
-            }
-            # Add unlock timestamp if newly unlocked
-            if achievement_data['unlocked'] and achievement_id not in user_progress['achievements']:
-                achievement_data['unlocked_at'] = datetime.utcnow().isoformat() + 'Z'
-            elif achievement_data['unlocked'] and achievement_id in user_progress['achievements']:
-                # Preserve original unlock time
-                if 'unlocked_at' in user_progress['achievements'][achievement_id]:
-                    achievement_data['unlocked_at'] = user_progress['achievements'][achievement_id]['unlocked_at']
-                else:
-                    achievement_data['unlocked_at'] = datetime.utcnow().isoformat() + 'Z'
-            user_progress['achievements'][achievement_id] = achievement_data
-            return self.save_user_progress(user_id, user_progress)
-        except Exception as e:
-            logger.error(f"Error unlocking achievement: {e}")
-            return False
-    # ==================== Helper Methods ====================
-    def _create_default_user_progress(self, user_id: str) -> Dict:
-        """Create default progress structure for a new user"""
-        return {
-            'user_id': user_id,
-            'profile': {
-                'user_id': user_id,
-                'learning_language': 'swa',
-                'native_language': 'eng',
-                'created_at': datetime.utcnow().isoformat() + 'Z',
-                'last_active': datetime.utcnow().isoformat() + 'Z'
-            },
-            'overall_stats': {
-                'level': 'beginner',
-                'total_xp': 0,
-                'next_level_xp': 1000,
-                'current_streak': 0,
-                'longest_streak': 0,
-                'lessons_completed': 0,
-                'vocabulary_learned': 0,
-                'vocabulary_mastered': 0,
-                'total_practice_time_seconds': 0,
-                'pronunciation_avg_score': 0.0,
-                'listening_avg_score': 0.0,
-                'reading_avg_score': 0.0
-            },
-            'daily_stats': {},
-            'lesson_progress': {},
-            'vocabulary_progress': {},
-            'achievements': {},
-            'session_history': []
-        }
-    def create_default_progress(self, user_id: str) -> Dict:
-        """Public method to create default progress structure"""
-        progress = self._create_default_user_progress(user_id)
-        # Add Phase 1-3 specific fields
-        progress['overall_stats']['vocabulary_reviewed'] = 0
-        progress['comprehension_scores'] = {}
-        progress['scenario_progress'] = {}
-        return progress
-    # ==================== Phase 1-3 Methods ====================
-    def get_vocabulary(self, vocab_id: int) -> Optional[Dict]:
-        """Get a single vocabulary word by ID from any lesson"""
-        try:
-            lessons_index = self.get_lessons_index()
-            if not lessons_index:
-                return None
-            # Search through all lessons
-            for lesson_meta in lessons_index.get('lessons', []):
-                lesson = self.get_lesson(lesson_meta['lesson_id'])
-                if lesson and 'vocabulary' in lesson:
-                    for vocab in lesson['vocabulary']:
-                        # Support both 'id' and 'vocabulary_id' fields
-                        vocab_item_id = vocab.get('vocabulary_id') or vocab.get('id')
-                        if vocab_item_id == vocab_id:
-                            # Add lesson context
-                            vocab['lesson_id'] = lesson['lesson_id']
-                            vocab['lesson_title'] = lesson.get('title', '')
-                            return vocab
-            logger.warning(f"Vocabulary {vocab_id} not found in any lesson")
-            return None
-        except Exception as e:
-            logger.error(f"Error getting vocabulary {vocab_id}: {e}")
-            return None
-    def get_all_vocabulary(self) -> List[Dict]:
-        """Get all vocabulary words from all lessons"""
-        try:
-            all_vocab = []
-            lessons_index = self.get_lessons_index()
-            if not lessons_index:
-                return all_vocab
-            for lesson_meta in lessons_index.get('lessons', []):
-                lesson = self.get_lesson(lesson_meta['lesson_id'])
-                if lesson and 'vocabulary' in lesson:
-                    for vocab in lesson['vocabulary']:
-                        # Add lesson context
-                        vocab_copy = vocab.copy()
-                        vocab_copy['lesson_id'] = lesson['lesson_id']
-                        vocab_copy['lesson_title'] = lesson.get('title', '')
-                        vocab_copy['lesson_level'] = lesson.get('difficulty_level', 1)
-                        all_vocab.append(vocab_copy)
-            return all_vocab
-        except Exception as e:
-            logger.error(f"Error getting all vocabulary: {e}")
-            return []
-    def get_scenario(self, scenario_id: str) -> Optional[Dict]:
-        """Load a task scenario by ID"""
-        try:
-            scenarios_dir = self.data_dir / "scenarios"
-            scenario_path = scenarios_dir / f"{scenario_id}.json"
-            if not scenario_path.exists():
-                logger.warning(f"Scenario file not found: {scenario_path}")
-                return None
-            with open(scenario_path, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except Exception as e:
-            logger.error(f"Error loading scenario {scenario_id}: {e}")
-            return None
-    def get_all_scenarios(self) -> List[Dict]:
-        """Get list of all available scenarios"""
-        try:
-            scenarios_dir = self.data_dir / "scenarios"
-            if not scenarios_dir.exists():
-                return []
-            scenarios = []
-            for scenario_file in scenarios_dir.glob("*.json"):
-                try:
-                    with open(scenario_file, 'r', encoding='utf-8') as f:
-                        scenario_data = json.load(f)
-                        # Add just metadata, not full dialogue tree
-                        scenarios.append({
-                            'scenario_id': scenario_data.get('scenario_id'),
-                            'title': scenario_data.get('title'),
-                            'title_en': scenario_data.get('title_en'),
-                            'level': scenario_data.get('level'),
-                            'estimated_duration_minutes': scenario_data.get('estimated_duration_minutes'),
-                            'learning_goals': scenario_data.get('learning_goals', [])
-                        })
-                except Exception as e:
-                    logger.error(f"Error loading scenario {scenario_file}: {e}")
-                    continue
-            return scenarios
-        except Exception as e:
-            logger.error(f"Error getting all scenarios: {e}")
-            return []

app/services/quantization_utils.py DELETED Viewed

@@ -1,124 +0,0 @@
-"""
-Dynamic INT8 Quantization utilities for ASR models.
-This module provides utilities to apply PyTorch dynamic quantization to
-Hugging Face transformer models, specifically optimized for ASR models like
-Whisper and Wav2Vec2-BERT.
-"""
-import torch
-from torch.quantization import quantize_dynamic
-from transformers import PreTrainedModel
-import time
-def apply_dynamic_int8_quantization(model: PreTrainedModel, model_type: str = "auto") -> PreTrainedModel:
-    """
-    Apply dynamic INT8 quantization to a Hugging Face model.
-    Dynamic quantization converts model weights to INT8 and activations to INT8 on-the-fly
-    during inference, reducing model size and improving inference speed with minimal
-    accuracy loss.
-    Args:
-        model: The Hugging Face model to quantize
-        model_type: Type of model ("whisper", "wav2vec2-bert", or "auto")
-    Returns:
-        Quantized model
-    References:
-        - PyTorch Quantization: https://pytorch.org/docs/stable/quantization.html
-        - Dynamic Quantization for NLP: https://pytorch.org/tutorials/recipes/recipes/dynamic_quantization.html
-    """
-    print(f"\n{'='*60}")
-    print(f"Applying Dynamic INT8 Quantization to {model_type} model")
-    print(f"{'='*60}")
-    # Get model size before quantization
-    param_size = 0
-    for param in model.parameters():
-        param_size += param.nelement() * param.element_size()
-    buffer_size = 0
-    for buffer in model.buffers():
-        buffer_size += buffer.nelement() * buffer.element_size()
-    size_before_mb = (param_size + buffer_size) / 1024**2
-    print(f"Model size before quantization: {size_before_mb:.2f} MB")
-    # Start quantization timer
-    start_time = time.time()
-    try:
-        # Dynamic quantization targets:
-        # - torch.nn.Linear: Most common layer type in transformers
-        # - torch.nn.LSTM/GRU/RNN: For sequential models (if present)
-        #
-        # Note: We use qint8 (quantized int8) which converts weights to INT8
-        # and performs INT8 arithmetic for linear layers during inference
-        quantized_model = quantize_dynamic(
-            model,
-            {torch.nn.Linear},  # Quantize all Linear layers
-            dtype=torch.qint8    # Use 8-bit integer quantization
-        )
-        # Get model size after quantization
-        param_size_q = 0
-        for param in quantized_model.parameters():
-            param_size_q += param.nelement() * param.element_size()
-        buffer_size_q = 0
-        for buffer in quantized_model.buffers():
-            buffer_size_q += buffer.nelement() * buffer.element_size()
-        size_after_mb = (param_size_q + buffer_size_q) / 1024**2
-        quantization_time = time.time() - start_time
-        size_reduction = ((size_before_mb - size_after_mb) / size_before_mb) * 100
-        print(f"✓ Quantization successful!")
-        print(f"  - Model size after quantization: {size_after_mb:.2f} MB")
-        print(f"  - Size reduction: {size_reduction:.1f}%")
-        print(f"  - Quantization time: {quantization_time:.2f}s")
-        print(f"{'='*60}\n")
-        return quantized_model
-    except Exception as e:
-        print(f"✗ Quantization failed: {e}")
-        print(f"  Returning original unquantized model")
-        print(f"{'='*60}\n")
-        return model
-def get_quantization_stats(model: PreTrainedModel) -> dict:
-    """
-    Get statistics about a model's quantization status.
-    Args:
-        model: The model to analyze
-    Returns:
-        Dictionary with quantization statistics
-    """
-    stats = {
-        "is_quantized": False,
-        "quantized_layers": 0,
-        "total_layers": 0,
-        "size_mb": 0.0
-    }
-    # Count quantized vs regular layers
-    for name, module in model.named_modules():
-        if isinstance(module, (torch.nn.Linear, torch.nn.LSTM, torch.nn.GRU)):
-            stats["total_layers"] += 1
-        # Check if layer is quantized (will have _packed_params attribute)
-        if hasattr(module, '_packed_params'):
-            stats["quantized_layers"] += 1
-            stats["is_quantized"] = True
-    # Calculate model size
-    param_size = sum(p.nelement() * p.element_size() for p in model.parameters())
-    buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers())
-    stats["size_mb"] = (param_size + buffer_size) / 1024**2
-    return stats

app/services/session_manager.py DELETED Viewed

@@ -1,180 +0,0 @@
-import uuid
-import random
-import string
-from typing import Dict, List, Optional
-from app.models import Session, SessionCreate, Participant, Language, LanguageCode
-def generate_short_code(length: int = 8) -> str:
-    """Generate a random short code using uppercase letters and digits"""
-    # Use only uppercase letters and digits to avoid confusion (no lowercase to avoid O/0, I/1 confusion)
-    alphabet = string.ascii_uppercase + string.digits
-    # Remove confusing characters
-    alphabet = alphabet.replace('O', '').replace('0', '').replace('I', '').replace('1', '')
-    return ''.join(random.choice(alphabet) for _ in range(length))
-# Language mappings
-LANGUAGE_MAP = {
-    LanguageCode.ENGLISH: Language(code=LanguageCode.ENGLISH, name="English", display_name="English (eng)"),
-    LanguageCode.SWAHILI: Language(code=LanguageCode.SWAHILI, name="Swahili", display_name="Swahili (swa)"),
-    LanguageCode.KIKUYU: Language(code=LanguageCode.KIKUYU, name="Kikuyu", display_name="Kikuyu (kik)"),
-    LanguageCode.KAMBA: Language(code=LanguageCode.KAMBA, name="Kamba", display_name="Kamba (kam)"),
-    LanguageCode.KIMERU: Language(code=LanguageCode.KIMERU, name="Kimeru", display_name="Kimeru (mer)"),
-    LanguageCode.LUO: Language(code=LanguageCode.LUO, name="Luo", display_name="Luo (luo)"),
-    LanguageCode.SOMALI: Language(code=LanguageCode.SOMALI, name="Somali", display_name="Somali (som)"),
-}
-class SessionManager:
-    def __init__(self):
-        self.sessions: Dict[str, Session] = {}
-        self.participant_sessions: Dict[str, str] = {}  # participant_id -> session_id
-        self.short_code_to_id: Dict[str, str] = {}  # short_code -> session_id
-        self.id_to_short_code: Dict[str, str] = {}  # session_id -> short_code
-    async def create_session(self, session_data: SessionCreate) -> Session:
-        session_id = str(uuid.uuid4())
-        # Generate unique short code
-        short_code = generate_short_code(8)
-        while short_code in self.short_code_to_id:
-            # Extremely unlikely collision, but regenerate if needed
-            short_code = generate_short_code(8)
-        # Convert language codes to Language objects
-        languages = [LANGUAGE_MAP[lang_code] for lang_code in session_data.languages]
-        session = Session(
-            id=session_id,
-            name=session_data.name,
-            organizer_name=session_data.organizer_name,
-            languages=languages,
-            participants=[],
-            is_active=True,
-            enable_tts=session_data.enable_tts
-        )
-        self.sessions[session_id] = session
-        self.short_code_to_id[short_code] = session_id
-        self.id_to_short_code[session_id] = short_code
-        return session
-    async def get_session(self, session_id_or_code: str) -> Optional[Session]:
-        """Get session by full UUID or short code"""
-        # Try as full UUID first
-        session = self.sessions.get(session_id_or_code)
-        if session:
-            return session
-        # Try as short code
-        session_id = self.short_code_to_id.get(session_id_or_code.upper())
-        if session_id:
-            return self.sessions.get(session_id)
-        return None
-    def get_short_code(self, session_id: str) -> str:
-        """Get short code for a session ID"""
-        return self.id_to_short_code.get(session_id, session_id)
-    async def get_all_sessions(self) -> List[Session]:
-        return list(self.sessions.values())
-    async def add_participant(self, session_id: str, participant_name: str, language_code: LanguageCode) -> Optional[Participant]:
-        session = await self.get_session(session_id)
-        if not session:
-            return None
-        participant_id = str(uuid.uuid4())
-        language = LANGUAGE_MAP[language_code]
-        # Check if the participant's language is already in the session languages
-        language_exists = any(lang.code == language_code for lang in session.languages)
-        if not language_exists:
-            print(f"Adding new language {language.name} ({language_code.value}) to session {session_id}")
-            session.languages.append(language)
-        participant = Participant(
-            id=participant_id,
-            name=participant_name,
-            language=language,
-            is_organizer=len(session.participants) == 0,  # First participant is organizer
-            is_speaking=False,
-            is_connected=True
-        )
-        session.participants.append(participant)
-        self.participant_sessions[participant_id] = session_id
-        print(f"Participant {participant_name} added to session. Session now has {len(session.languages)} languages: {[lang.name for lang in session.languages]}")
-        return participant
-    async def remove_participant(self, participant_id: str) -> bool:
-        session_id = self.participant_sessions.get(participant_id)
-        if not session_id:
-            return False
-        session = await self.get_session(session_id)
-        if not session:
-            return False
-        # Remove participant from session
-        session.participants = [p for p in session.participants if p.id != participant_id]
-        del self.participant_sessions[participant_id]
-        return True
-    async def update_participant_speaking_status(self, participant_id: str, is_speaking: bool) -> bool:
-        session_id = self.participant_sessions.get(participant_id)
-        if not session_id:
-            return False
-        session = await self.get_session(session_id)
-        if not session:
-            return False
-        for participant in session.participants:
-            if participant.id == participant_id:
-                participant.is_speaking = is_speaking
-                return True
-        return False
-    async def get_participant_session_id(self, participant_id: str) -> Optional[str]:
-        return self.participant_sessions.get(participant_id)
-    async def add_language_to_session(self, session_id: str, language_code: LanguageCode) -> bool:
-        """Add a language to the session if it doesn't already exist"""
-        session = await self.get_session(session_id)
-        if not session:
-            return False
-        language = LANGUAGE_MAP[language_code]
-        # Check if the language is already in the session languages
-        language_exists = any(lang.code == language_code for lang in session.languages)
-        if not language_exists:
-            print(f"Adding new language {language.name} ({language_code.value}) to session {session_id}")
-            session.languages.append(language)
-            print(f"Session {session_id} now has {len(session.languages)} languages: {[lang.name for lang in session.languages]}")
-            return True
-        else:
-            print(f"Language {language.name} ({language_code.value}) already exists in session {session_id}")
-            return False
-    async def delete_session(self, session_id: str) -> bool:
-        if session_id in self.sessions:
-            # Remove all participants from tracking
-            session = self.sessions[session_id]
-            for participant in session.participants:
-                if participant.id in self.participant_sessions:
-                    del self.participant_sessions[participant.id]
-            # Remove short code mapping
-            short_code = self.id_to_short_code.get(session_id)
-            if short_code:
-                del self.short_code_to_id[short_code]
-                del self.id_to_short_code[session_id]
-            del self.sessions[session_id]
-            return True
-        return False

app/services/transcription_service.py DELETED Viewed

@@ -1,736 +0,0 @@
-import asyncio
-import io
-import wave
-import numpy as np
-import time
-from typing import Dict, Optional, Callable
-from transformers import pipeline
-import torch
-from app.models import LanguageCode
-import os
-from app.services.quantization_utils import apply_dynamic_int8_quantization, get_quantization_stats
-# Silero VAD imports
-try:
-    import silero_vad
-    SILERO_VAD_AVAILABLE = True
-except ImportError:
-    SILERO_VAD_AVAILABLE = False
-    print("Warning: silero-vad not installed. Falling back to RMS-based VAD.")
-class TranscriptionService:
-    def __init__(self):
-        self.asr_pipelines: Dict[str, any] = {}
-        self.device = 0 if torch.cuda.is_available() else -1
-        # Model configurations - using original mutisya models with updated config
-        self.asr_config = {
-            "eng": {"model_repo": "openai/whisper-base.en", "model_type": "whisper"},
-            "swa": {"model_repo": "mutisya/w2v-bert-2.0-asr-swh-superv-v25-37-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "kik": {"model_repo": "mutisya/w2v-bert-2.0-asr-kik-superv-v25-37-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "kam": {"model_repo": "mutisya/w2v-bert-2.0-asr-kam-superv-v25-36-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "mer": {"model_repo": "mutisya/w2v-bert-2.0-asr-mer-superv-v25-36-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "luo": {"model_repo": "mutisya/w2v-bert-2.0-asr-luo-superv-v25-36-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "som": {"model_repo": "mutisya/w2v-bert-2.0-asr-som-superv-v25-36-1", "model_type": "wav2vec2-bert", "trust_remote_code": True}
-        }
-        self.preload_languages = ["eng"]
-        self.background_loading_task = None
-        self.models_loading_status = {}
-        # Enhanced audio buffering for VAD-based sentence detection
-        self.candidate_audio_buffers: Dict[str, bytes] = {}  # participant_id -> candidate audio buffer
-        self.candidate_text_cache: Dict[str, str] = {}  # participant_id -> current candidate text
-        self.silence_counters: Dict[str, int] = {}  # participant_id -> consecutive silence chunks
-        self.sentence_finalized: Dict[str, bool] = {}  # participant_id -> whether current sentence is already finalized
-        # VAD parameters - made more lenient for better detection
-        self.silence_threshold = 1  # Number of consecutive silent chunks before sentence break (1 second for natural pauses)
-        self.min_sentence_length = 0.03  # Minimum sentence length in seconds (very short)
-        # Silero VAD initialization
-        self.vad_model = None
-        self.vad_sample_rate = 16000
-        self.vad_available = SILERO_VAD_AVAILABLE
-        # Quantization configuration
-        # Set ENABLE_INT8_QUANTIZATION=true in environment to enable quantization
-        self.enable_quantization = os.getenv('ENABLE_INT8_QUANTIZATION', 'true').lower() == 'true'
-        print(f"INT8 Quantization: {'ENABLED' if self.enable_quantization else 'DISABLED'}")
-    async def initialize(self):
-        """Initialize ASR models for preloaded languages and Silero VAD"""
-        # Initialize Silero VAD model
-        if self.vad_available:
-            try:
-                print("Loading Silero VAD model...")
-                self.vad_model = silero_vad.load_silero_vad(onnx=False)
-                print("✓ Silero VAD model loaded successfully")
-            except Exception as e:
-                print(f"Failed to load Silero VAD model: {e}")
-                print("Falling back to RMS-based VAD")
-                self.vad_available = False
-        # Initialize ASR models
-        for lang_code in self.preload_languages:
-            if lang_code in self.asr_config:
-                try:
-                    model_config = self.asr_config[lang_code]
-                    pipeline_obj = self._load_and_quantize_pipeline(lang_code, model_config)
-                    self.asr_pipelines[lang_code] = pipeline_obj
-                except Exception as e:
-                    print(f"Failed to load ASR model for {lang_code}: {e}")
-    def _load_and_quantize_pipeline(self, lang_code: str, model_config: dict):
-        """Load ASR pipeline and optionally apply INT8 quantization"""
-        # Build pipeline parameters
-        pipeline_params = {
-            "task": "automatic-speech-recognition",
-            "model": model_config["model_repo"],
-            "device": self.device,
-            "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32
-        }
-        # Add trust_remote_code if specified
-        if model_config.get("trust_remote_code", False):
-            pipeline_params["trust_remote_code"] = True
-        print(f"Loading ASR model for {lang_code}: {model_config['model_repo']}")
-        pipeline_obj = pipeline(**pipeline_params)
-        # Apply quantization if enabled
-        if self.enable_quantization:
-            try:
-                # Get the underlying model from the pipeline
-                model = pipeline_obj.model
-                model_type = model_config.get("model_type", "auto")
-                # Apply dynamic INT8 quantization
-                quantized_model = apply_dynamic_int8_quantization(model, model_type)
-                # Replace the model in the pipeline
-                pipeline_obj.model = quantized_model
-                # Print quantization stats
-                stats = get_quantization_stats(quantized_model)
-                print(f"✓ {lang_code} model quantized: {stats['quantized_layers']}/{stats['total_layers']} layers, {stats['size_mb']:.2f} MB")
-            except Exception as e:
-                print(f"Warning: Could not quantize {lang_code} model: {e}")
-                print(f"Continuing with unquantized model")
-        return pipeline_obj
-    async def ensure_model_loaded(self, language_code: str):
-        """Load ASR model for language if not already loaded"""
-        if language_code not in self.asr_pipelines and language_code in self.asr_config:
-            try:
-                model_config = self.asr_config[language_code]
-                pipeline_obj = self._load_and_quantize_pipeline(language_code, model_config)
-                self.asr_pipelines[language_code] = pipeline_obj
-            except Exception as e:
-                print(f"Failed to load ASR model for {language_code}: {e}")
-                raise
-    async def process_audio_chunk(self, audio_data: bytes, language_code: str, participant_id: str,
-                                 has_voice_activity: bool = True,
-                                 progress_callback: Optional[Callable] = None,
-                                 sentence_callback: Optional[Callable] = None,
-                                 debug_callback: Optional[Callable] = None) -> str:
-        """Process audio chunk with VAD-based sentence detection"""
-        try:
-            # Initialize buffers if needed
-            if participant_id not in self.candidate_audio_buffers:
-                # Store as numpy array, not bytes, to avoid multiple WAV header issues
-                self.candidate_audio_buffers[participant_id] = np.array([], dtype=np.float32)
-                self.candidate_text_cache[participant_id] = ""
-                self.silence_counters[participant_id] = 0
-                self.sentence_finalized[participant_id] = False
-            # Convert current chunk to numpy array for processing
-            current_chunk_array = self._bytes_to_audio_array(audio_data)
-            if len(current_chunk_array) == 0:
-                print(f"WARNING: Received empty audio chunk for participant {participant_id}")
-                return self.candidate_text_cache.get(participant_id, "")
-            print(f"DEBUG: Received audio chunk - bytes: {len(audio_data)}, samples: {len(current_chunk_array)}, "
-                  f"duration: {len(current_chunk_array)/16000:.3f}s, "
-                  f"first 4 bytes: {audio_data[:4]}")
-            # DO NOT normalize individual chunks - this causes audio distortion
-            # We'll normalize the entire accumulated audio buffer before transcription
-            current_chunk_array = current_chunk_array.astype(np.float32)
-            # Get existing accumulated audio array (now stored as numpy array)
-            existing_array = self.candidate_audio_buffers[participant_id]
-            if len(existing_array) > 0:
-                # Concatenate with existing audio (like stream = np.concatenate([stream, y]))
-                combined_array = np.concatenate([existing_array, current_chunk_array])
-            else:
-                combined_array = current_chunk_array
-            # Store as numpy array to avoid WAV header accumulation issues
-            self.candidate_audio_buffers[participant_id] = combined_array
-            # For debug callback, convert to bytes (this adds ONE WAV header)
-            combined_bytes = self._audio_array_to_bytes(combined_array)
-            # Update silence counter based on voice activity
-            if not has_voice_activity:
-                self.silence_counters[participant_id] += 1
-            else:
-                self.silence_counters[participant_id] = 0
-            # Check if we should finalize sentence due to prolonged silence
-            should_finalize = (self.silence_counters[participant_id] >= self.silence_threshold and
-                              len(combined_array) > 0 and
-                              not self.sentence_finalized[participant_id])
-            if should_finalize:
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Always run transcription on the accumulated audio
-            audio_duration_sec = len(combined_array) / 16000.0  # 16kHz sample rate
-            # Minimum duration check - ignore very short audio bursts
-            MIN_CHUNK_DURATION = 0.3  # 300ms minimum
-            if audio_duration_sec < MIN_CHUNK_DURATION:
-                print(f"Audio chunk too short: {audio_duration_sec:.3f}s < {MIN_CHUNK_DURATION}s, skipping transcription")
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            # Force finalization if buffer gets too long (prevent infinite accumulation)
-            if audio_duration_sec > 15.0 and not self.sentence_finalized[participant_id]:  # Force completion after 15 seconds
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Run voice activity detection on the accumulated audio before transcription
-            has_voice_in_buffer = self.has_meaningful_voice_activity(combined_bytes)
-            if not has_voice_in_buffer:
-                # Still send progress update with cached text to maintain UI state
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            # Run transcription
-            await self.ensure_model_loaded(language_code)
-            # Double-check voice activity before running expensive ASR
-            has_voice_for_asr = self.has_voice_activity(combined_bytes)
-            if not has_voice_for_asr:
-                print(f"ASR: No voice activity detected in audio buffer for participant {participant_id}, skipping ASR execution")
-                # Return cached text and send progress update
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            if language_code not in self.asr_pipelines:
-                raise ValueError(f"ASR model not available for language: {language_code}")
-            print(f"ASR: Running transcription for participant {participant_id} with {len(combined_array)/16000:.2f}s of audio")
-            pipeline_obj = self.asr_pipelines[language_code]
-            # Normalize the ENTIRE accumulated audio buffer before transcription
-            # This prevents audio distortion from per-chunk normalization
-            normalized_array = combined_array.astype(np.float32)
-            max_val = np.max(np.abs(normalized_array))
-            if max_val > 0:
-                normalized_array = normalized_array / max_val
-            # Track transcription latency
-            transcription_start_time = time.time()
-            # For wav2vec2 models, request word timestamps
-            model_type = self.asr_config[language_code].get("model_type", "whisper")
-            if model_type in ["wav2vec2-bert", "wav2vec2"]:
-                result = pipeline_obj(
-                    {"sampling_rate": 16000, "raw": normalized_array},
-                    return_timestamps="word"
-                )
-            else:
-                # Whisper model - add anti-hallucination parameters
-                # Note: HuggingFace pipeline uses different parameter names than OpenAI Whisper
-                result = pipeline_obj(
-                    {"sampling_rate": 16000, "raw": normalized_array},
-                    return_timestamps=True,
-                    chunk_length_s=30,  # Process in 30s chunks
-                    stride_length_s=5   # 5s stride for context
-                )
-            transcription_latency_ms = (time.time() - transcription_start_time) * 1000
-            candidate_text = result.get("text", "").strip()
-            word_timestamps = result.get("chunks", []) if model_type in ["wav2vec2-bert", "wav2vec2"] else None
-            # Send debug information if callback provided (for wav2vec2 models only)
-            if debug_callback and word_timestamps is not None:
-                debug_info = {
-                    "text": candidate_text,
-                    "timestamps": word_timestamps,
-                    "audio_data": combined_bytes,
-                    "audio_duration": audio_duration_sec,
-                    "model_type": model_type,
-                    "transcription_latency_ms": transcription_latency_ms
-                }
-                await debug_callback(debug_info)
-            # Filter out common ASR artifacts and very short responses
-            artifacts = [
-                "thank you", "thanks", "bye", ".", ",", "?", "!",
-                "um", "uh", "ah", "hmm", "mm", "mhm",
-                "you", "the", "a", "an", "and", "but", "or",
-                "music", "laughter", "applause", "[music]", "[laughter]",
-                # Common Whisper hallucinations:
-                "subscribe", "subtitles", "amara", "www", "http",
-                "please subscribe", "like and subscribe",
-                "thank you for watching", "don't forget to subscribe",
-                "[blank_audio]", "[noise]", "[silence]",
-            ]
-            # Check if the result is likely an artifact
-            is_artifact = (
-                len(candidate_text) < 3 or  # Very short
-                candidate_text.lower() in artifacts or  # Common artifacts
-                len(candidate_text.split()) == 1 and len(candidate_text) < 6  # Single very short word
-            )
-            if is_artifact:
-                # Keep the previous cached text instead of updating with artifact
-                candidate_text = self.candidate_text_cache.get(participant_id, "")
-            # Cache the current candidate text
-            self.candidate_text_cache[participant_id] = candidate_text
-            # Force completion if we have a reasonable amount of text and some silence
-            word_count = len(candidate_text.split()) if candidate_text else 0
-            if (word_count >= 3 and self.silence_counters[participant_id] >= 2 and
-                not self.sentence_finalized[participant_id]):  # At least 3 words and 2 silent chunks
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Always send progress update
-            if progress_callback:
-                await progress_callback(candidate_text, False)
-            return candidate_text
-        except Exception as e:
-            print(f"TranscriptionService: Error processing audio chunk: {e}")
-            import traceback
-            traceback.print_exc()
-            # Even on error, try to send cached text
-            if progress_callback:
-                cached_text = self.candidate_text_cache.get(participant_id, "")
-                await progress_callback(cached_text, False)
-            return self.candidate_text_cache.get(participant_id, "")
-    async def _finalize_candidate_sentence(self, language_code: str, participant_id: str,
-                                         sentence_callback: Optional[Callable] = None) -> str:
-        """Finalize the current candidate sentence and clear buffers"""
-        try:
-            # Check if sentence was already finalized
-            if self.sentence_finalized.get(participant_id, False):
-                print(f"Sentence for participant {participant_id} already finalized, skipping duplicate")
-                return self.candidate_text_cache.get(participant_id, "")
-            final_text = self.candidate_text_cache.get(participant_id, "")
-            final_audio_array = self.candidate_audio_buffers.get(participant_id, np.array([], dtype=np.float32))
-            # Convert audio array to bytes for VAD check and callback
-            final_audio_bytes = self._audio_array_to_bytes(final_audio_array) if len(final_audio_array) > 0 else b''
-            if final_text and len(final_text.strip()) > 0:
-                # Run VAD check on the final accumulated buffer before sending for translation
-                if len(final_audio_bytes) > 0:
-                    has_voice_in_final = self.has_meaningful_voice_activity(final_audio_bytes)
-                    if not has_voice_in_final:
-                        print(f"Finalize: No voice activity in final buffer for participant {participant_id}, discarding sentence: '{final_text}'")
-                        # Clear buffers without sending to translation
-                        self.candidate_audio_buffers[participant_id] = np.array([], dtype=np.float32)
-                        self.candidate_text_cache[participant_id] = ""
-                        self.silence_counters[participant_id] = 0
-                        self.sentence_finalized[participant_id] = False
-                        return ""
-                # Mark as finalized BEFORE calling the callback to prevent race conditions
-                self.sentence_finalized[participant_id] = True
-                # Send to sentence callback for translation
-                if sentence_callback and len(final_audio_bytes) > 0:
-                    print(f"Finalizing sentence for participant {participant_id}: '{final_text}'")
-                    await sentence_callback(final_text, final_audio_bytes)
-            # Clear buffers for next sentence
-            self.candidate_audio_buffers[participant_id] = np.array([], dtype=np.float32)
-            self.candidate_text_cache[participant_id] = ""
-            self.silence_counters[participant_id] = 0
-            self.sentence_finalized[participant_id] = False  # Reset for next sentence
-            return final_text
-        except Exception as e:
-            print(f"Error finalizing sentence: {e}")
-            import traceback
-            traceback.print_exc()
-            # Reset finalized flag on error
-            self.sentence_finalized[participant_id] = False
-            return ""
-    def has_voice_activity(self, audio_data: bytes, threshold: float = 0.5) -> bool:
-        """Voice Activity Detection using Silero VAD (with RMS fallback)"""
-        try:
-            audio_array = self._bytes_to_audio_array(audio_data)
-            if len(audio_array) == 0:
-                print("VAD: No audio array, returning False")
-                return False
-            # Normalize audio to float32 range [-1, 1]
-            audio_array = audio_array.astype(np.float32)
-            if np.max(np.abs(audio_array)) > 0:
-                audio_array /= np.max(np.abs(audio_array))
-            # Use Silero VAD if available
-            if self.vad_available and self.vad_model is not None:
-                try:
-                    # Silero VAD expects 512 samples (32ms) or 1536 samples (96ms) for 16kHz
-                    # Process audio in chunks and average the probabilities
-                    frame_size = 512  # 32ms at 16kHz
-                    num_samples = len(audio_array)
-                    # If audio is too short, pad it
-                    if num_samples < frame_size:
-                        audio_array = np.pad(audio_array, (0, frame_size - num_samples), mode='constant')
-                        num_samples = frame_size
-                    # Process in frames and collect probabilities
-                    speech_probs = []
-                    for i in range(0, num_samples, frame_size):
-                        frame = audio_array[i:i + frame_size]
-                        if len(frame) < frame_size:
-                            # Pad last frame if needed
-                            frame = np.pad(frame, (0, frame_size - len(frame)), mode='constant')
-                        # Convert to torch tensor
-                        frame_tensor = torch.from_numpy(frame).float()
-                        # Get speech probability from Silero VAD
-                        with torch.no_grad():
-                            prob = self.vad_model(frame_tensor, self.vad_sample_rate).item()
-                            speech_probs.append(prob)
-                    # Average probability across all frames
-                    speech_prob = np.mean(speech_probs)
-                    has_voice = speech_prob > threshold
-                    print(f"VAD: Silero speech_prob={speech_prob:.4f} (avg of {len(speech_probs)} frames), threshold={threshold}, RESULT={has_voice}")
-                    return has_voice
-                except Exception as e:
-                    print(f"Silero VAD error: {e}, falling back to RMS-based VAD")
-                    # Fall through to RMS-based VAD below
-            # Fallback: RMS-based VAD (original implementation)
-            rms_threshold = 0.002
-            rms = np.sqrt(np.mean(audio_array ** 2))
-            peak = np.max(np.abs(audio_array))
-            audio_std = np.std(audio_array)
-            zero_crossing_rate = np.sum(np.diff(np.sign(audio_array)) != 0) / len(audio_array)
-            has_voice_rms = rms > rms_threshold
-            has_voice_peak = peak > rms_threshold * 3
-            has_voice_variation = audio_std > rms_threshold * 0.8
-            has_voice_zcr = zero_crossing_rate > 0.008
-            has_voice = has_voice_rms or (has_voice_peak and has_voice_variation) or has_voice_zcr
-            print(f"VAD: RMS-based - RMS={rms:.6f}({has_voice_rms}), peak={peak:.6f}({has_voice_peak}), std={audio_std:.6f}({has_voice_variation}), zcr={zero_crossing_rate:.6f}({has_voice_zcr}), RESULT={has_voice}")
-            return has_voice
-        except Exception as e:
-            print(f"Error in VAD: {e}")
-            return True  # Default to assuming voice activity on error
-    def has_meaningful_voice_activity(self, audio_data: bytes, threshold: float = 0.005) -> bool:
-        """Stricter VAD check specifically for pre-transcription filtering"""
-        try:
-            audio_array = self._bytes_to_audio_array(audio_data)
-            if len(audio_array) == 0:
-                return False
-            # Normalize audio
-            audio_array = audio_array.astype(np.float32)
-            if np.max(np.abs(audio_array)) > 0:
-                audio_array /= np.max(np.abs(audio_array))
-            # Calculate features with higher thresholds for meaningful speech
-            rms = np.sqrt(np.mean(audio_array ** 2))
-            peak = np.max(np.abs(audio_array))
-            audio_std = np.std(audio_array)
-            zero_crossing_rate = np.sum(np.diff(np.sign(audio_array)) != 0) / len(audio_array)
-            # Higher thresholds for meaningful speech detection
-            has_meaningful_voice = (
-                rms > threshold and
-                peak > threshold * 2 and
-                audio_std > threshold * 0.5 and
-                zero_crossing_rate > 0.015  # Higher ZCR threshold for meaningful speech
-            )
-            return has_meaningful_voice
-        except Exception as e:
-            print(f"Error in meaningful VAD: {e}")
-            return False  # Default to no meaningful voice on error
-    async def force_complete_sentence(self, participant_id: str, language_code: str, sentence_callback: Optional[Callable] = None) -> str:
-        """Force complete any pending sentence for a participant"""
-        try:
-            # Check if sentence was already finalized
-            if self.sentence_finalized.get(participant_id, False):
-                print(f"Force completion: Sentence for participant {participant_id} already finalized, skipping")
-                return ""
-            if participant_id in self.candidate_text_cache:
-                cached_text = self.candidate_text_cache[participant_id]
-                if cached_text and len(cached_text.strip()) > 0:
-                    result = await self._finalize_candidate_sentence(language_code, participant_id, sentence_callback)
-                    return result
-            return ""
-        except Exception as e:
-            print(f"Error in force_complete_sentence: {e}")
-            import traceback
-            traceback.print_exc()
-            return ""
-    async def transcribe_audio(self, audio_data: bytes, language_code: str, callback: Optional[Callable] = None) -> str:
-        """Transcribe audio data to text"""
-        try:
-            # Check for voice activity before running ASR
-            has_voice = self.has_voice_activity(audio_data)
-            if not has_voice:
-                print(f"ASR: No voice activity detected in audio data, skipping transcription")
-                return ""
-            await self.ensure_model_loaded(language_code)
-            if language_code not in self.asr_pipelines:
-                raise ValueError(f"ASR model not available for language: {language_code}")
-            # Convert audio bytes to numpy array
-            audio_array = self._bytes_to_audio_array(audio_data)
-            print(f"ASR: Running transcription with {len(audio_array)/16000:.2f}s of audio")
-            # Transcribe
-            pipeline_obj = self.asr_pipelines[language_code]
-            result = pipeline_obj({"sampling_rate": 16000, "raw": audio_array})
-            text = result.get("text", "")
-            if callback:
-                await callback(text)
-            return text
-        except Exception as e:
-            print(f"TranscriptionService: Transcription error: {e}")
-            import traceback
-            traceback.print_exc()
-            return ""
-    def _bytes_to_audio_array(self, audio_data: bytes) -> np.ndarray:
-        """Convert audio bytes to numpy array (supports WAV, WebM/Opus)"""
-        try:
-            # Detect format by checking magic bytes
-            is_webm = audio_data[:4] == b'\x1a\x45\xdf\xa3'  # WebM/Matroska magic bytes
-            is_wav = audio_data[:4] == b'RIFF'
-            import sys
-            print(f"_bytes_to_audio_array: length={len(audio_data)}, first 4 bytes={audio_data[:4]}, is_wav={is_wav}", flush=True)
-            sys.stdout.flush()
-            # Handle raw PCM (16-bit, 48kHz from extendable-media-recorder)
-            # This is the most common case for microphone input
-            if not is_wav and not is_webm and len(audio_data) > 0:
-                try:
-                    # Assume 16-bit PCM at 48kHz (browser's native rate)
-                    audio_array = np.frombuffer(audio_data, dtype=np.int16)
-                    # Check if this looks like valid audio data (not NaN, reasonable range)
-                    if len(audio_array) > 0 and not np.isnan(audio_array).any():
-                        print(f"Raw PCM: {len(audio_array)} samples, assuming 48kHz 16-bit", flush=True)
-                        # Convert to float32 and normalize
-                        audio_float = audio_array.astype(np.float32) / 32768.0
-                        # Resample from 48kHz to 16kHz
-                        import librosa
-                        audio_array = librosa.resample(audio_float, orig_sr=48000, target_sr=16000)
-                        print(f"Resampled to 16kHz: {len(audio_array)} samples", flush=True)
-                        return audio_array
-                except Exception as pcm_error:
-                    print(f"TranscriptionService: Raw PCM decoding error: {pcm_error}", flush=True)
-                    # Fall through to other methods
-            if is_webm:
-                # Decode WebM/Opus using pydub (requires ffmpeg)
-                try:
-                    from pydub import AudioSegment
-                    audio_io = io.BytesIO(audio_data)
-                    audio_segment = AudioSegment.from_file(audio_io, format="webm")
-                    # Convert to mono 16kHz
-                    audio_segment = audio_segment.set_channels(1)
-                    audio_segment = audio_segment.set_frame_rate(16000)
-                    # Convert to numpy array
-                    samples = np.array(audio_segment.get_array_of_samples(), dtype=np.int16)
-                    # Normalize to float32 [-1, 1]
-                    audio_array = samples.astype(np.float32) / 32768.0
-                    return audio_array
-                except Exception as webm_error:
-                    print(f"TranscriptionService: WebM decoding error: {webm_error}")
-                    # Fall through to other methods
-            if is_wav:
-                # Decode WAV format (first chunk from frontend includes WAV header with sample rate)
-                try:
-                    audio_io = io.BytesIO(audio_data)
-                    with wave.open(audio_io, 'rb') as wav_file:
-                        sample_rate = wav_file.getframerate()
-                        channels = wav_file.getnchannels()
-                        sample_width = wav_file.getsampwidth()
-                        print(f"WAV format: {sample_rate}Hz, {channels} channel(s), {sample_width*8}-bit", flush=True)
-                        frames = wav_file.readframes(-1)
-                        audio_array = np.frombuffer(frames, dtype=np.int16)
-                        # Resample if needed
-                        if sample_rate != 16000:
-                            print(f"WARNING: Resampling from {sample_rate}Hz to 16000Hz", flush=True)
-                            import librosa
-                            # Convert to float first
-                            audio_float = audio_array.astype(np.float32) / 32768.0
-                            # Resample
-                            audio_array = librosa.resample(audio_float, orig_sr=sample_rate, target_sr=16000)
-                            print(f"Resampled: {len(audio_array)} samples at 16kHz", flush=True)
-                        else:
-                            # Convert to float32 and normalize
-                            audio_array = audio_array.astype(np.float32) / 32768.0
-                        print(f"Returning audio array: {len(audio_array)} samples", flush=True)
-                        return audio_array
-                except Exception as wav_error:
-                    print(f"TranscriptionService: WAV decoding error: {wav_error}")
-                    import traceback
-                    traceback.print_exc()
-            # Fallback: assume raw float32 audio data
-            try:
-                audio_array = np.frombuffer(audio_data, dtype=np.float32)
-                return audio_array
-            except Exception:
-                pass
-            # Last resort: return empty array
-            return np.array([], dtype=np.float32)
-        except Exception as e:
-            print(f"TranscriptionService: Audio conversion error: {e}")
-            return np.array([], dtype=np.float32)
-    def _audio_array_to_bytes(self, audio_array: np.ndarray) -> bytes:
-        """Convert numpy audio array back to WAV bytes for storage"""
-        try:
-            # Ensure float32 format
-            if audio_array.dtype != np.float32:
-                audio_array = audio_array.astype(np.float32)
-            # Convert to 16-bit PCM for WAV storage
-            audio_int16 = (audio_array * 32767).astype(np.int16)
-            # Create WAV bytes
-            wav_buffer = io.BytesIO()
-            with wave.open(wav_buffer, 'wb') as wav_file:
-                wav_file.setnchannels(1)  # Mono
-                wav_file.setsampwidth(2)  # 16-bit
-                wav_file.setframerate(16000)  # 16kHz
-                wav_file.writeframes(audio_int16.tobytes())
-            return wav_buffer.getvalue()
-        except Exception as e:
-            print(f"Error converting audio array to bytes: {e}")
-            return b''
-    def clear_participant_buffers(self, participant_id: str):
-        """Clear all buffers for a participant (e.g., when they stop speaking or disconnect)"""
-        if participant_id in self.candidate_audio_buffers:
-            del self.candidate_audio_buffers[participant_id]
-        if participant_id in self.candidate_text_cache:
-            del self.candidate_text_cache[participant_id]
-        if participant_id in self.silence_counters:
-            del self.silence_counters[participant_id]
-        if participant_id in self.sentence_finalized:
-            del self.sentence_finalized[participant_id]
-    async def load_remaining_models_in_background(self):
-        """Load all remaining ASR models in the background after startup"""
-        try:
-            print("ASR: Starting background loading of additional language models...")
-            for lang_code in self.asr_config.keys():
-                if lang_code not in self.preload_languages and lang_code not in self.asr_pipelines:
-                    try:
-                        print(f"ASR: Background loading model for {lang_code}...")
-                        self.models_loading_status[lang_code] = "loading"
-                        model_config = self.asr_config[lang_code]
-                        # Use quantization helper for background loading too
-                        pipeline_obj = self._load_and_quantize_pipeline(lang_code, model_config)
-                        self.asr_pipelines[lang_code] = pipeline_obj
-                        self.models_loading_status[lang_code] = "loaded"
-                        print(f"ASR: Successfully loaded model for {lang_code} in background")
-                        # Add a small delay between loading models to prevent overwhelming the system
-                        await asyncio.sleep(2)
-                    except Exception as e:
-                        print(f"ASR: Failed to load model for {lang_code} in background: {e}")
-                        self.models_loading_status[lang_code] = "failed"
-            print("ASR: Background loading of all language models complete")
-            print(f"ASR: Loaded models: {list(self.asr_pipelines.keys())}")
-        except Exception as e:
-            print(f"ASR: Error in background model loading: {e}")
-    def start_background_loading(self):
-        """Start background loading of models as a non-blocking task"""
-        if self.background_loading_task is None:
-            self.background_loading_task = asyncio.create_task(self.load_remaining_models_in_background())
-            print("ASR: Background model loading task started")
-    async def cleanup(self):
-        """Cleanup resources"""
-        # Cancel background loading if still running
-        if self.background_loading_task and not self.background_loading_task.done():
-            self.background_loading_task.cancel()
-            try:
-                await self.background_loading_task
-            except asyncio.CancelledError:
-                pass
-        self.asr_pipelines.clear()

app/services/transcription_service.py.bak DELETED Viewed

@@ -1,726 +0,0 @@
-import asyncio
-import io
-import wave
-import numpy as np
-import time
-from typing import Dict, Optional, Callable
-from transformers import pipeline
-import torch
-from app.models import LanguageCode
-from app.services.performance_mixin import track_performance
-# Silero VAD imports
-try:
-    import silero_vad
-    SILERO_VAD_AVAILABLE = True
-except ImportError:
-    SILERO_VAD_AVAILABLE = False
-    print("Warning: silero-vad not installed. Falling back to RMS-based VAD.")
-class TranscriptionService:
-    def __init__(self):
-        self.asr_pipelines: Dict[str, any] = {}
-        self.device = 0 if torch.cuda.is_available() else -1
-        # Model configurations - using original mutisya models with updated config
-        self.asr_config = {
-            "eng": {"model_repo": "openai/whisper-base.en", "model_type": "whisper"},
-            "swa": {"model_repo": "mutisya/w2v-bert-2.0-asr-swh-superv-v25-37-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "kik": {"model_repo": "mutisya/w2v-bert-2.0-asr-kik-superv-v25-37-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "kam": {"model_repo": "mutisya/w2v-bert-2.0-asr-kam-superv-v25-36-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "mer": {"model_repo": "mutisya/w2v-bert-2.0-asr-mer-superv-v25-36-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "luo": {"model_repo": "mutisya/w2v-bert-2.0-asr-luo-superv-v25-36-1", "model_type": "wav2vec2-bert", "trust_remote_code": True},
-            "som": {"model_repo": "mutisya/w2v-bert-2.0-asr-som-superv-v25-36-1", "model_type": "wav2vec2-bert", "trust_remote_code": True}
-        }
-        self.preload_languages = ["eng"]
-        self.background_loading_task = None
-        self.models_loading_status = {}
-        # Enhanced audio buffering for VAD-based sentence detection
-        self.candidate_audio_buffers: Dict[str, bytes] = {}  # participant_id -> candidate audio buffer
-        self.candidate_text_cache: Dict[str, str] = {}  # participant_id -> current candidate text
-        self.silence_counters: Dict[str, int] = {}  # participant_id -> consecutive silence chunks
-        self.sentence_finalized: Dict[str, bool] = {}  # participant_id -> whether current sentence is already finalized
-        # VAD parameters - made more lenient for better detection
-        self.silence_threshold = 1  # Number of consecutive silent chunks before sentence break (1 second for natural pauses)
-        self.min_sentence_length = 0.03  # Minimum sentence length in seconds (very short)
-        # Silero VAD initialization
-        self.vad_model = None
-        self.vad_sample_rate = 16000
-        self.vad_available = SILERO_VAD_AVAILABLE
-    async def initialize(self):
-        """Initialize ASR models for preloaded languages and Silero VAD"""
-        # Initialize Silero VAD model
-        if self.vad_available:
-            try:
-                print("Loading Silero VAD model...")
-                self.vad_model = silero_vad.load_silero_vad(onnx=False)
-                print("✓ Silero VAD model loaded successfully")
-            except Exception as e:
-                print(f"Failed to load Silero VAD model: {e}")
-                print("Falling back to RMS-based VAD")
-                self.vad_available = False
-        # Initialize ASR models
-        for lang_code in self.preload_languages:
-            if lang_code in self.asr_config:
-                try:
-                    model_config = self.asr_config[lang_code]
-                    # Build pipeline parameters
-                    pipeline_params = {
-                        "task": "automatic-speech-recognition",
-                        "model": model_config["model_repo"],
-                        "device": self.device,
-                        "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32
-                    }
-                    # Add trust_remote_code if specified
-                    if model_config.get("trust_remote_code", False):
-                        pipeline_params["trust_remote_code"] = True
-                    pipeline_obj = pipeline(**pipeline_params)
-                    self.asr_pipelines[lang_code] = pipeline_obj
-                except Exception as e:
-                    print(f"Failed to load ASR model for {lang_code}: {e}")
-    async def ensure_model_loaded(self, language_code: str):
-        """Load ASR model for language if not already loaded"""
-        if language_code not in self.asr_pipelines and language_code in self.asr_config:
-            try:
-                model_config = self.asr_config[language_code]
-                # Build pipeline parameters
-                pipeline_params = {
-                    "task": "automatic-speech-recognition",
-                    "model": model_config["model_repo"],
-                    "device": self.device,
-                    "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32
-                }
-                # Add trust_remote_code if specified
-                if model_config.get("trust_remote_code", False):
-                    pipeline_params["trust_remote_code"] = True
-                pipeline_obj = pipeline(**pipeline_params)
-                self.asr_pipelines[language_code] = pipeline_obj
-            except Exception as e:
-                print(f"Failed to load ASR model for {language_code}: {e}")
-                raise
-    async def process_audio_chunk(self, audio_data: bytes, language_code: str, participant_id: str,
-                                 has_voice_activity: bool = True,
-                                 progress_callback: Optional[Callable] = None,
-                                 sentence_callback: Optional[Callable] = None,
-                                 debug_callback: Optional[Callable] = None) -> str:
-        """Process audio chunk with VAD-based sentence detection"""
-        try:
-            # Initialize buffers if needed
-            if participant_id not in self.candidate_audio_buffers:
-                # Store as numpy array, not bytes, to avoid multiple WAV header issues
-                self.candidate_audio_buffers[participant_id] = np.array([], dtype=np.float32)
-                self.candidate_text_cache[participant_id] = ""
-                self.silence_counters[participant_id] = 0
-                self.sentence_finalized[participant_id] = False
-            # Convert current chunk to numpy array for processing
-            current_chunk_array = self._bytes_to_audio_array(audio_data)
-            if len(current_chunk_array) == 0:
-                print(f"WARNING: Received empty audio chunk for participant {participant_id}")
-                return self.candidate_text_cache.get(participant_id, "")
-            print(f"DEBUG: Received audio chunk - bytes: {len(audio_data)}, samples: {len(current_chunk_array)}, "
-                  f"duration: {len(current_chunk_array)/16000:.3f}s, "
-                  f"first 4 bytes: {audio_data[:4]}")
-            # DO NOT normalize individual chunks - this causes audio distortion
-            # We'll normalize the entire accumulated audio buffer before transcription
-            current_chunk_array = current_chunk_array.astype(np.float32)
-            # Get existing accumulated audio array (now stored as numpy array)
-            existing_array = self.candidate_audio_buffers[participant_id]
-            if len(existing_array) > 0:
-                # Concatenate with existing audio (like stream = np.concatenate([stream, y]))
-                combined_array = np.concatenate([existing_array, current_chunk_array])
-            else:
-                combined_array = current_chunk_array
-            # Store as numpy array to avoid WAV header accumulation issues
-            self.candidate_audio_buffers[participant_id] = combined_array
-            # For debug callback, convert to bytes (this adds ONE WAV header)
-            combined_bytes = self._audio_array_to_bytes(combined_array)
-            # Update silence counter based on voice activity
-            if not has_voice_activity:
-                self.silence_counters[participant_id] += 1
-            else:
-                self.silence_counters[participant_id] = 0
-            # Check if we should finalize sentence due to prolonged silence
-            should_finalize = (self.silence_counters[participant_id] >= self.silence_threshold and
-                              len(combined_array) > 0 and
-                              not self.sentence_finalized[participant_id])
-            if should_finalize:
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Always run transcription on the accumulated audio
-            audio_duration_sec = len(combined_array) / 16000.0  # 16kHz sample rate
-            # Minimum duration check - ignore very short audio bursts
-            MIN_CHUNK_DURATION = 0.3  # 300ms minimum
-            if audio_duration_sec < MIN_CHUNK_DURATION:
-                print(f"Audio chunk too short: {audio_duration_sec:.3f}s < {MIN_CHUNK_DURATION}s, skipping transcription")
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            # Force finalization if buffer gets too long (prevent infinite accumulation)
-            if audio_duration_sec > 15.0 and not self.sentence_finalized[participant_id]:  # Force completion after 15 seconds
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Run voice activity detection on the accumulated audio before transcription
-            has_voice_in_buffer = self.has_meaningful_voice_activity(combined_bytes)
-            if not has_voice_in_buffer:
-                # Still send progress update with cached text to maintain UI state
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            # Run transcription
-            await self.ensure_model_loaded(language_code)
-            # Double-check voice activity before running expensive ASR
-            has_voice_for_asr = self.has_voice_activity(combined_bytes)
-            if not has_voice_for_asr:
-                print(f"ASR: No voice activity detected in audio buffer for participant {participant_id}, skipping ASR execution")
-                # Return cached text and send progress update
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            if language_code not in self.asr_pipelines:
-                raise ValueError(f"ASR model not available for language: {language_code}")
-            print(f"ASR: Running transcription for participant {participant_id} with {len(combined_array)/16000:.2f}s of audio")
-            pipeline_obj = self.asr_pipelines[language_code]
-            # Normalize the ENTIRE accumulated audio buffer before transcription
-            # This prevents audio distortion from per-chunk normalization
-            normalized_array = combined_array.astype(np.float32)
-            max_val = np.max(np.abs(normalized_array))
-            if max_val > 0:
-                normalized_array = normalized_array / max_val
-            # Track transcription latency
-            transcription_start_time = time.time()
-            # For wav2vec2 models, request word timestamps
-            model_type = self.asr_config[language_code].get("model_type", "whisper")
-            if model_type in ["wav2vec2-bert", "wav2vec2"]:
-                result = pipeline_obj(
-                    {"sampling_rate": 16000, "raw": normalized_array},
-                    return_timestamps="word"
-                )
-            else:
-                # Whisper model - add anti-hallucination parameters
-                # Note: HuggingFace pipeline uses different parameter names than OpenAI Whisper
-                result = pipeline_obj(
-                    {"sampling_rate": 16000, "raw": normalized_array},
-                    return_timestamps=True,
-                    chunk_length_s=30,  # Process in 30s chunks
-                    stride_length_s=5   # 5s stride for context
-                )
-            transcription_latency_ms = (time.time() - transcription_start_time) * 1000
-            candidate_text = result.get("text", "").strip()
-            word_timestamps = result.get("chunks", []) if model_type in ["wav2vec2-bert", "wav2vec2"] else None
-            # Send debug information if callback provided (for wav2vec2 models only)
-            if debug_callback and word_timestamps is not None:
-                debug_info = {
-                    "text": candidate_text,
-                    "timestamps": word_timestamps,
-                    "audio_data": combined_bytes,
-                    "audio_duration": audio_duration_sec,
-                    "model_type": model_type,
-                    "transcription_latency_ms": transcription_latency_ms
-                }
-                await debug_callback(debug_info)
-            # Filter out common ASR artifacts and very short responses
-            artifacts = [
-                "thank you", "thanks", "bye", ".", ",", "?", "!",
-                "um", "uh", "ah", "hmm", "mm", "mhm",
-                "you", "the", "a", "an", "and", "but", "or",
-                "music", "laughter", "applause", "[music]", "[laughter]",
-                # Common Whisper hallucinations:
-                "subscribe", "subtitles", "amara", "www", "http",
-                "please subscribe", "like and subscribe",
-                "thank you for watching", "don't forget to subscribe",
-                "[blank_audio]", "[noise]", "[silence]",
-            ]
-            # Check if the result is likely an artifact
-            is_artifact = (
-                len(candidate_text) < 3 or  # Very short
-                candidate_text.lower() in artifacts or  # Common artifacts
-                len(candidate_text.split()) == 1 and len(candidate_text) < 6  # Single very short word
-            )
-            if is_artifact:
-                # Keep the previous cached text instead of updating with artifact
-                candidate_text = self.candidate_text_cache.get(participant_id, "")
-            # Cache the current candidate text
-            self.candidate_text_cache[participant_id] = candidate_text
-            # Force completion if we have a reasonable amount of text and some silence
-            word_count = len(candidate_text.split()) if candidate_text else 0
-            if (word_count >= 3 and self.silence_counters[participant_id] >= 2 and
-                not self.sentence_finalized[participant_id]):  # At least 3 words and 2 silent chunks
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Always send progress update
-            if progress_callback:
-                await progress_callback(candidate_text, False)
-            return candidate_text
-        except Exception as e:
-            print(f"TranscriptionService: Error processing audio chunk: {e}")
-            import traceback
-            traceback.print_exc()
-            # Even on error, try to send cached text
-            if progress_callback:
-                cached_text = self.candidate_text_cache.get(participant_id, "")
-                await progress_callback(cached_text, False)
-            return self.candidate_text_cache.get(participant_id, "")
-    async def _finalize_candidate_sentence(self, language_code: str, participant_id: str,
-                                         sentence_callback: Optional[Callable] = None) -> str:
-        """Finalize the current candidate sentence and clear buffers"""
-        try:
-            # Check if sentence was already finalized
-            if self.sentence_finalized.get(participant_id, False):
-                print(f"Sentence for participant {participant_id} already finalized, skipping duplicate")
-                return self.candidate_text_cache.get(participant_id, "")
-            final_text = self.candidate_text_cache.get(participant_id, "")
-            final_audio_array = self.candidate_audio_buffers.get(participant_id, np.array([], dtype=np.float32))
-            # Convert audio array to bytes for VAD check and callback
-            final_audio_bytes = self._audio_array_to_bytes(final_audio_array) if len(final_audio_array) > 0 else b''
-            if final_text and len(final_text.strip()) > 0:
-                # Run VAD check on the final accumulated buffer before sending for translation
-                if len(final_audio_bytes) > 0:
-                    has_voice_in_final = self.has_meaningful_voice_activity(final_audio_bytes)
-                    if not has_voice_in_final:
-                        print(f"Finalize: No voice activity in final buffer for participant {participant_id}, discarding sentence: '{final_text}'")
-                        # Clear buffers without sending to translation
-                        self.candidate_audio_buffers[participant_id] = np.array([], dtype=np.float32)
-                        self.candidate_text_cache[participant_id] = ""
-                        self.silence_counters[participant_id] = 0
-                        self.sentence_finalized[participant_id] = False
-                        return ""
-                # Mark as finalized BEFORE calling the callback to prevent race conditions
-                self.sentence_finalized[participant_id] = True
-                # Send to sentence callback for translation
-                if sentence_callback and len(final_audio_bytes) > 0:
-                    print(f"Finalizing sentence for participant {participant_id}: '{final_text}'")
-                    await sentence_callback(final_text, final_audio_bytes)
-            # Clear buffers for next sentence
-            self.candidate_audio_buffers[participant_id] = np.array([], dtype=np.float32)
-            self.candidate_text_cache[participant_id] = ""
-            self.silence_counters[participant_id] = 0
-            self.sentence_finalized[participant_id] = False  # Reset for next sentence
-            return final_text
-        except Exception as e:
-            print(f"Error finalizing sentence: {e}")
-            import traceback
-            traceback.print_exc()
-            # Reset finalized flag on error
-            self.sentence_finalized[participant_id] = False
-            return ""
-    def has_voice_activity(self, audio_data: bytes, threshold: float = 0.5) -> bool:
-        """Voice Activity Detection using Silero VAD (with RMS fallback)"""
-        try:
-            audio_array = self._bytes_to_audio_array(audio_data)
-            if len(audio_array) == 0:
-                print("VAD: No audio array, returning False")
-                return False
-            # Normalize audio to float32 range [-1, 1]
-            audio_array = audio_array.astype(np.float32)
-            if np.max(np.abs(audio_array)) > 0:
-                audio_array /= np.max(np.abs(audio_array))
-            # Use Silero VAD if available
-            if self.vad_available and self.vad_model is not None:
-                try:
-                    # Silero VAD expects 512 samples (32ms) or 1536 samples (96ms) for 16kHz
-                    # Process audio in chunks and average the probabilities
-                    frame_size = 512  # 32ms at 16kHz
-                    num_samples = len(audio_array)
-                    # If audio is too short, pad it
-                    if num_samples < frame_size:
-                        audio_array = np.pad(audio_array, (0, frame_size - num_samples), mode='constant')
-                        num_samples = frame_size
-                    # Process in frames and collect probabilities
-                    speech_probs = []
-                    for i in range(0, num_samples, frame_size):
-                        frame = audio_array[i:i + frame_size]
-                        if len(frame) < frame_size:
-                            # Pad last frame if needed
-                            frame = np.pad(frame, (0, frame_size - len(frame)), mode='constant')
-                        # Convert to torch tensor
-                        frame_tensor = torch.from_numpy(frame).float()
-                        # Get speech probability from Silero VAD
-                        with torch.no_grad():
-                            prob = self.vad_model(frame_tensor, self.vad_sample_rate).item()
-                            speech_probs.append(prob)
-                    # Average probability across all frames
-                    speech_prob = np.mean(speech_probs)
-                    has_voice = speech_prob > threshold
-                    print(f"VAD: Silero speech_prob={speech_prob:.4f} (avg of {len(speech_probs)} frames), threshold={threshold}, RESULT={has_voice}")
-                    return has_voice
-                except Exception as e:
-                    print(f"Silero VAD error: {e}, falling back to RMS-based VAD")
-                    # Fall through to RMS-based VAD below
-            # Fallback: RMS-based VAD (original implementation)
-            rms_threshold = 0.002
-            rms = np.sqrt(np.mean(audio_array ** 2))
-            peak = np.max(np.abs(audio_array))
-            audio_std = np.std(audio_array)
-            zero_crossing_rate = np.sum(np.diff(np.sign(audio_array)) != 0) / len(audio_array)
-            has_voice_rms = rms > rms_threshold
-            has_voice_peak = peak > rms_threshold * 3
-            has_voice_variation = audio_std > rms_threshold * 0.8
-            has_voice_zcr = zero_crossing_rate > 0.008
-            has_voice = has_voice_rms or (has_voice_peak and has_voice_variation) or has_voice_zcr
-            print(f"VAD: RMS-based - RMS={rms:.6f}({has_voice_rms}), peak={peak:.6f}({has_voice_peak}), std={audio_std:.6f}({has_voice_variation}), zcr={zero_crossing_rate:.6f}({has_voice_zcr}), RESULT={has_voice}")
-            return has_voice
-        except Exception as e:
-            print(f"Error in VAD: {e}")
-            return True  # Default to assuming voice activity on error
-    def has_meaningful_voice_activity(self, audio_data: bytes, threshold: float = 0.005) -> bool:
-        """Stricter VAD check specifically for pre-transcription filtering"""
-        try:
-            audio_array = self._bytes_to_audio_array(audio_data)
-            if len(audio_array) == 0:
-                return False
-            # Normalize audio
-            audio_array = audio_array.astype(np.float32)
-            if np.max(np.abs(audio_array)) > 0:
-                audio_array /= np.max(np.abs(audio_array))
-            # Calculate features with higher thresholds for meaningful speech
-            rms = np.sqrt(np.mean(audio_array ** 2))
-            peak = np.max(np.abs(audio_array))
-            audio_std = np.std(audio_array)
-            zero_crossing_rate = np.sum(np.diff(np.sign(audio_array)) != 0) / len(audio_array)
-            # Higher thresholds for meaningful speech detection
-            has_meaningful_voice = (
-                rms > threshold and
-                peak > threshold * 2 and
-                audio_std > threshold * 0.5 and
-                zero_crossing_rate > 0.015  # Higher ZCR threshold for meaningful speech
-            )
-            return has_meaningful_voice
-        except Exception as e:
-            print(f"Error in meaningful VAD: {e}")
-            return False  # Default to no meaningful voice on error
-    async def force_complete_sentence(self, participant_id: str, language_code: str, sentence_callback: Optional[Callable] = None) -> str:
-        """Force complete any pending sentence for a participant"""
-        try:
-            # Check if sentence was already finalized
-            if self.sentence_finalized.get(participant_id, False):
-                print(f"Force completion: Sentence for participant {participant_id} already finalized, skipping")
-                return ""
-            if participant_id in self.candidate_text_cache:
-                cached_text = self.candidate_text_cache[participant_id]
-                if cached_text and len(cached_text.strip()) > 0:
-                    result = await self._finalize_candidate_sentence(language_code, participant_id, sentence_callback)
-                    return result
-            return ""
-        except Exception as e:
-            print(f"Error in force_complete_sentence: {e}")
-            import traceback
-            traceback.print_exc()
-            return ""
-    @track_performance("transcription", "transcribe_audio")
-    async def transcribe_audio(self, audio_data: bytes, language_code: str, callback: Optional[Callable] = None) -> str:
-        """Transcribe audio data to text"""
-        try:
-            # Check for voice activity before running ASR
-            has_voice = self.has_voice_activity(audio_data)
-            if not has_voice:
-                print(f"ASR: No voice activity detected in audio data, skipping transcription")
-                return ""
-            await self.ensure_model_loaded(language_code)
-            if language_code not in self.asr_pipelines:
-                raise ValueError(f"ASR model not available for language: {language_code}")
-            # Convert audio bytes to numpy array
-            audio_array = self._bytes_to_audio_array(audio_data)
-            print(f"ASR: Running transcription with {len(audio_array)/16000:.2f}s of audio")
-            # Transcribe
-            pipeline_obj = self.asr_pipelines[language_code]
-            result = pipeline_obj({"sampling_rate": 16000, "raw": audio_array})
-            text = result.get("text", "")
-            if callback:
-                await callback(text)
-            return text
-        except Exception as e:
-            print(f"TranscriptionService: Transcription error: {e}")
-            import traceback
-            traceback.print_exc()
-            return ""
-    def _bytes_to_audio_array(self, audio_data: bytes) -> np.ndarray:
-        """Convert audio bytes to numpy array (supports WAV, WebM/Opus)"""
-        try:
-            # Detect format by checking magic bytes
-            is_webm = audio_data[:4] == b'\x1a\x45\xdf\xa3'  # WebM/Matroska magic bytes
-            is_wav = audio_data[:4] == b'RIFF'
-            import sys
-            print(f"_bytes_to_audio_array: length={len(audio_data)}, first 4 bytes={audio_data[:4]}, is_wav={is_wav}", flush=True)
-            sys.stdout.flush()
-            # Handle raw PCM (16-bit, 48kHz from extendable-media-recorder)
-            # This is the most common case now that we strip WAV headers in frontend
-            if not is_wav and not is_webm and len(audio_data) > 0:
-                try:
-                    # Assume 16-bit PCM at 48kHz (browser's native rate)
-                    audio_array = np.frombuffer(audio_data, dtype=np.int16)
-                    # Check if this looks like valid audio data (not NaN, reasonable range)
-                    if len(audio_array) > 0 and not np.isnan(audio_array).any():
-                        print(f"Raw PCM: {len(audio_array)} samples, assuming 48kHz 16-bit", flush=True)
-                        # Convert to float32 and normalize
-                        audio_float = audio_array.astype(np.float32) / 32768.0
-                        # Resample from 48kHz to 16kHz
-                        import librosa
-                        audio_array = librosa.resample(audio_float, orig_sr=48000, target_sr=16000)
-                        print(f"Resampled to 16kHz: {len(audio_array)} samples", flush=True)
-                        return audio_array
-                except Exception as pcm_error:
-                    print(f"TranscriptionService: Raw PCM decoding error: {pcm_error}", flush=True)
-                    # Fall through to other methods
-            if is_webm:
-                # Decode WebM/Opus using pydub (requires ffmpeg)
-                try:
-                    from pydub import AudioSegment
-                    audio_io = io.BytesIO(audio_data)
-                    audio_segment = AudioSegment.from_file(audio_io, format="webm")
-                    # Convert to mono 16kHz
-                    audio_segment = audio_segment.set_channels(1)
-                    audio_segment = audio_segment.set_frame_rate(16000)
-                    # Convert to numpy array
-                    samples = np.array(audio_segment.get_array_of_samples(), dtype=np.int16)
-                    # Normalize to float32 [-1, 1]
-                    audio_array = samples.astype(np.float32) / 32768.0
-                    return audio_array
-                except Exception as webm_error:
-                    print(f"TranscriptionService: WebM decoding error: {webm_error}")
-                    # Fall through to other methods
-            if is_wav:
-                # Decode WAV format
-                try:
-                    audio_io = io.BytesIO(audio_data)
-                    with wave.open(audio_io, 'rb') as wav_file:
-                        sample_rate = wav_file.getframerate()
-                        channels = wav_file.getnchannels()
-                        sample_width = wav_file.getsampwidth()
-                        print(f"WAV format: {sample_rate}Hz, {channels} channel(s), {sample_width*8}-bit", flush=True)
-                        frames = wav_file.readframes(-1)
-                        audio_array = np.frombuffer(frames, dtype=np.int16)
-                        # Resample if needed
-                        if sample_rate != 16000:
-                            print(f"WARNING: Resampling from {sample_rate}Hz to 16000Hz", flush=True)
-                            import librosa
-                            # Convert to float first
-                            audio_float = audio_array.astype(np.float32) / 32768.0
-                            # Resample
-                            audio_array = librosa.resample(audio_float, orig_sr=sample_rate, target_sr=16000)
-                            print(f"Resampled: {len(audio_array)} samples at 16kHz", flush=True)
-                        else:
-                            # Convert to float32 and normalize
-                            audio_array = audio_array.astype(np.float32) / 32768.0
-                        print(f"Returning audio array: {len(audio_array)} samples", flush=True)
-                        return audio_array
-                except Exception as wav_error:
-                    print(f"TranscriptionService: WAV decoding error: {wav_error}")
-                    import traceback
-                    traceback.print_exc()
-            # Fallback: assume raw float32 audio data
-            try:
-                audio_array = np.frombuffer(audio_data, dtype=np.float32)
-                return audio_array
-            except Exception:
-                pass
-            # Last resort: return empty array
-            return np.array([], dtype=np.float32)
-        except Exception as e:
-            print(f"TranscriptionService: Audio conversion error: {e}")
-            return np.array([], dtype=np.float32)
-    def _audio_array_to_bytes(self, audio_array: np.ndarray) -> bytes:
-        """Convert numpy audio array back to WAV bytes for storage"""
-        try:
-            # Ensure float32 format
-            if audio_array.dtype != np.float32:
-                audio_array = audio_array.astype(np.float32)
-            # Convert to 16-bit PCM for WAV storage
-            audio_int16 = (audio_array * 32767).astype(np.int16)
-            # Create WAV bytes
-            wav_buffer = io.BytesIO()
-            with wave.open(wav_buffer, 'wb') as wav_file:
-                wav_file.setnchannels(1)  # Mono
-                wav_file.setsampwidth(2)  # 16-bit
-                wav_file.setframerate(16000)  # 16kHz
-                wav_file.writeframes(audio_int16.tobytes())
-            return wav_buffer.getvalue()
-        except Exception as e:
-            print(f"Error converting audio array to bytes: {e}")
-            return b''
-    def clear_participant_buffers(self, participant_id: str):
-        """Clear all buffers for a participant (e.g., when they stop speaking or disconnect)"""
-        if participant_id in self.candidate_audio_buffers:
-            del self.candidate_audio_buffers[participant_id]
-        if participant_id in self.candidate_text_cache:
-            del self.candidate_text_cache[participant_id]
-        if participant_id in self.silence_counters:
-            del self.silence_counters[participant_id]
-        if participant_id in self.sentence_finalized:
-            del self.sentence_finalized[participant_id]
-    async def load_remaining_models_in_background(self):
-        """Load all remaining ASR models in the background after startup"""
-        try:
-            print("ASR: Starting background loading of additional language models...")
-            for lang_code in self.asr_config.keys():
-                if lang_code not in self.preload_languages and lang_code not in self.asr_pipelines:
-                    try:
-                        print(f"ASR: Background loading model for {lang_code}...")
-                        self.models_loading_status[lang_code] = "loading"
-                        model_config = self.asr_config[lang_code]
-                        # Build pipeline parameters
-                        pipeline_params = {
-                            "task": "automatic-speech-recognition",
-                            "model": model_config["model_repo"],
-                            "device": self.device,
-                            "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32
-                        }
-                        # Add trust_remote_code if specified
-                        if model_config.get("trust_remote_code", False):
-                            pipeline_params["trust_remote_code"] = True
-                        pipeline_obj = pipeline(**pipeline_params)
-                        self.asr_pipelines[lang_code] = pipeline_obj
-                        self.models_loading_status[lang_code] = "loaded"
-                        print(f"ASR: Successfully loaded model for {lang_code} in background")
-                        # Add a small delay between loading models to prevent overwhelming the system
-                        await asyncio.sleep(2)
-                    except Exception as e:
-                        print(f"ASR: Failed to load model for {lang_code} in background: {e}")
-                        self.models_loading_status[lang_code] = "failed"
-            print("ASR: Background loading of all language models complete")
-            print(f"ASR: Loaded models: {list(self.asr_pipelines.keys())}")
-        except Exception as e:
-            print(f"ASR: Error in background model loading: {e}")
-    def start_background_loading(self):
-        """Start background loading of models as a non-blocking task"""
-        if self.background_loading_task is None:
-            self.background_loading_task = asyncio.create_task(self.load_remaining_models_in_background())
-            print("ASR: Background model loading task started")
-    async def cleanup(self):
-        """Cleanup resources"""
-        # Cancel background loading if still running
-        if self.background_loading_task and not self.background_loading_task.done():
-            self.background_loading_task.cancel()
-            try:
-                await self.background_loading_task
-            except asyncio.CancelledError:
-                pass
-        self.asr_pipelines.clear()

app/services/transcription_service_onnx.py DELETED Viewed

@@ -1,682 +0,0 @@
-import asyncio
-import io
-import wave
-import numpy as np
-from typing import Dict, Optional, Callable
-from collections import OrderedDict
-import onnxruntime as ort
-from transformers import AutoProcessor, WhisperProcessor
-from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
-import os
-from app.models import LanguageCode
-class ONNXTranscriptionService:
-    def __init__(self):
-        self.asr_models: Dict[str, any] = {}
-        self.processors: Dict[str, any] = {}
-        self.max_asr_models = 2  # Memory management - keep max 2 models loaded
-        self.model_cache = OrderedDict()  # LRU cache for models
-        # GPU optimization - detect and configure providers
-        available_providers = ort.get_available_providers()
-        print(f"ONNX ASR: Available providers: {available_providers}")
-        if 'CUDAExecutionProvider' in available_providers:
-            # Configure CUDA provider with optimizations
-            cuda_provider_options = {
-                'device_id': 0,
-                'arena_extend_strategy': 'kNextPowerOfTwo',
-                'gpu_mem_limit': int(0.8 * 1024 * 1024 * 1024),  # 80% of GPU memory
-                'cudnn_conv_algo_search': 'EXHAUSTIVE',
-                'do_copy_in_default_stream': True,
-                'enable_tracing': True,  # Enable tracing for better diagnostics
-            }
-            # Include TensorRT if available, then CUDA, then CPU
-            provider_list = []
-            if 'TensorrtExecutionProvider' in available_providers:
-                provider_list.append('TensorrtExecutionProvider')
-            provider_list.append(('CUDAExecutionProvider', cuda_provider_options))
-            provider_list.append('CPUExecutionProvider')
-            self.providers = provider_list
-            print(f"ONNX ASR: Using GPU acceleration with providers: {[p[0] if isinstance(p, tuple) else p for p in provider_list]}")
-            print(f"ONNX ASR: GPU memory limit: {cuda_provider_options['gpu_mem_limit'] // (1024**3)}GB")
-        else:
-            self.providers = ['CPUExecutionProvider']
-            print("ONNX ASR: CUDA not available, using CPU execution")
-        print(f"ONNX ASR: Configured providers: {[p[0] if isinstance(p, tuple) else p for p in self.providers]}")
-        # ONNX Model configurations - using pre-converted ONNX models from HuggingFace
-        self.asr_config = {
-            "eng": {"model_repo": "mutisya/whisper-medium-en-onnx", "model_type": "whisper", "use_onnx": True},  # Pre-converted ONNX model
-            "swa": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-swh-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "kik": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-kik-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "kam": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-kam-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "mer": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-mer-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "luo": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-luo-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "som": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-som-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True}
-        }
-        # Alternative model configurations for different performance tiers
-        self.alternative_models = {
-            "eng_small": {"model_repo": "mutisya/whisper-small-en-onnx", "model_type": "whisper", "use_onnx": True},
-            "eng_base": {"model_repo": "mutisya/whisper-base-en-onnx", "model_type": "whisper", "use_onnx": True},
-            "eng_medium": {"model_repo": "mutisya/whisper-medium-en-onnx", "model_type": "whisper", "use_onnx": True}
-        }
-        self.preload_languages = ["eng"]
-        # Current model performance mode (small, base, medium)
-        # Can be configured via environment variable WHISPER_MODEL_SIZE
-        self.performance_mode = os.getenv("WHISPER_MODEL_SIZE", "medium").lower()
-        # Enhanced audio buffering for VAD-based sentence detection
-        self.candidate_audio_buffers: Dict[str, bytes] = {}
-        self.candidate_text_cache: Dict[str, str] = {}
-        self.silence_counters: Dict[str, int] = {}
-        self.sentence_finalized: Dict[str, bool] = {}
-        # VAD parameters
-        self.silence_threshold = 2
-        self.min_sentence_length = 0.03
-    def set_performance_mode(self, mode: str):
-        """Set the performance mode for English models (small, base, medium)"""
-        if mode in ["small", "base", "medium"]:
-            self.performance_mode = mode
-            # Update the English model configuration based on performance mode
-            if f"eng_{mode}" in self.alternative_models:
-                self.asr_config["eng"] = self.alternative_models[f"eng_{mode}"]
-                # Clear cached English model to force reload with new configuration
-                if "eng" in self.model_cache:
-                    del self.model_cache["eng"]
-                if "eng" in self.asr_models:
-                    del self.asr_models["eng"]
-                if "eng" in self.processors:
-                    del self.processors["eng"]
-                print(f"Performance mode set to {mode}. English model will be reloaded on next use.")
-            else:
-                print(f"Warning: No model configuration found for performance mode {mode}")
-        else:
-            print(f"Invalid performance mode: {mode}. Must be one of: small, base, medium")
-    async def initialize(self):
-        """Initialize ASR models for preloaded languages"""
-        print(f"ONNX ASR: Initializing with providers: {self.providers}")
-        # Apply performance mode to English model configuration
-        if self.performance_mode in ["small", "base", "medium"]:
-            if f"eng_{self.performance_mode}" in self.alternative_models:
-                self.asr_config["eng"] = self.alternative_models[f"eng_{self.performance_mode}"]
-                print(f"Using Whisper {self.performance_mode} model for English")
-            else:
-                print(f"Warning: Performance mode {self.performance_mode} not available, using default medium")
-        for lang_code in self.preload_languages:
-            if lang_code in self.asr_config:
-                try:
-                    await self.ensure_model_loaded(lang_code)
-                except Exception as e:
-                    print(f"Failed to load ASR model for {lang_code}: {e}")
-    async def ensure_model_loaded(self, language_code: str):
-        """Load ASR model for language if not already loaded with LRU cache"""
-        if language_code in self.model_cache:
-            # Move to end (most recently used)
-            self.model_cache.move_to_end(language_code)
-            return
-        if language_code not in self.asr_config:
-            raise ValueError(f"Language {language_code} not supported")
-        model_config = self.asr_config[language_code]
-        # Check if we need to evict old models
-        while len(self.model_cache) >= self.max_asr_models:
-            # Remove least recently used model
-            old_lang, _ = self.model_cache.popitem(last=False)
-            if old_lang in self.asr_models:
-                del self.asr_models[old_lang]
-            if old_lang in self.processors:
-                del self.processors[old_lang]
-            print(f"ONNX ASR: Evicted model for {old_lang} (LRU cache)")
-        try:
-            if model_config.get("use_onnx", False):
-                # Load ONNX model
-                print(f"ONNX ASR: Loading ONNX model for {language_code}")
-                # Special handling for Whisper models
-                if model_config.get("model_type") == "whisper":
-                    print(f"ONNX ASR: Loading Whisper ONNX model from {model_config['model_repo']}")
-                    # Get authentication token for private repos
-                    import os
-                    auth_token = os.getenv('HUGGING_FACE_HUB_TOKEN') or os.getenv('HF_TOKEN')
-                    # Load pre-converted Whisper ONNX model using Optimum
-                    load_kwargs = {
-                        # export=False because we're using pre-converted models
-                        "export": False,
-                        # use_cache=True because our models now include past key value variants for optimization
-                        "use_cache": True,
-                        # Add authentication token for private repos
-                        "token": auth_token
-                    }
-                    # Configure providers - pass all available providers to Optimum
-                    provider_names = [p[0] if isinstance(p, tuple) else p for p in self.providers]
-                    load_kwargs["providers"] = provider_names
-                    print(f"ONNX ASR: Whisper using providers: {provider_names}")
-                    # Add subfolder if specified (for models that store ONNX in subfolders)
-                    if "subfolder" in model_config:
-                        load_kwargs["subfolder"] = model_config["subfolder"]
-                    model = ORTModelForSpeechSeq2Seq.from_pretrained(
-                        model_config["model_repo"],
-                        **load_kwargs
-                    )
-                    # Load Whisper processor with authentication token
-                    processor = WhisperProcessor.from_pretrained(
-                        model_config["model_repo"],
-                        token=auth_token
-                    )
-                    # Configure for English transcription
-                    model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(
-                        language="en",
-                        task="transcribe"
-                    )
-                    self.asr_models[language_code] = model
-                    self.processors[language_code] = processor
-                    print(f"ONNX ASR: Successfully loaded Whisper ONNX model for {language_code}")
-                else:
-                    # Original wav2vec2-bert model loading logic
-                    # Create ONNX session with optimizations and verbose logging
-                    session_options = ort.SessionOptions()
-                    session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-                    # Enable verbose logging to diagnose operator assignments
-                    session_options.log_severity_level = 1  # WARNING level for detailed logs
-                    session_options.logid = "ONNX_ASR"      # Prefix for log identification
-                    # Use configured providers with optimizations
-                    providers = self.providers
-                    print(f"ONNX ASR: wav2vec2-bert using providers: {[p[0] if isinstance(p, tuple) else p for p in providers]}")
-                    # Get authentication token for private repos
-                    import os
-                    auth_token = os.getenv('HUGGING_FACE_HUB_TOKEN') or os.getenv('HF_TOKEN')
-                    # Download model files from HuggingFace Hub with authentication
-                    from huggingface_hub import hf_hub_download
-                    onnx_path = hf_hub_download(
-                        repo_id=model_config["model_repo"],
-                        filename="model.onnx",
-                        token=auth_token
-                    )
-                    session = ort.InferenceSession(onnx_path, providers=providers, sess_options=session_options)
-                    # Load processor for preprocessing with authentication
-                    processor = AutoProcessor.from_pretrained(
-                        model_config["model_repo"],
-                        token=auth_token
-                    )
-                    self.asr_models[language_code] = session
-                    self.processors[language_code] = processor
-                    print(f"ONNX ASR: Successfully loaded ONNX model for {language_code}")
-            else:
-                # This service is ONNX-only - no PyTorch fallback
-                raise ValueError(f"Language {language_code} is not configured for ONNX models. Set 'use_onnx': True in config.")
-            # Add to cache
-            self.model_cache[language_code] = True
-        except Exception as e:
-            print(f"Failed to load ASR model for {language_code}: {e}")
-            raise
-    async def process_audio_chunk(self, audio_data: bytes, language_code: str, participant_id: str,
-                                 has_voice_activity: bool = True,
-                                 progress_callback: Optional[Callable] = None,
-                                 sentence_callback: Optional[Callable] = None) -> str:
-        """Process audio chunk with VAD-based sentence detection using ONNX models"""
-        try:
-            # Initialize buffers if needed
-            if participant_id not in self.candidate_audio_buffers:
-                self.candidate_audio_buffers[participant_id] = b''
-                self.candidate_text_cache[participant_id] = ""
-                self.silence_counters[participant_id] = 0
-                self.sentence_finalized[participant_id] = False
-            # Convert current chunk to numpy array for processing
-            current_chunk_array = self._bytes_to_audio_array(audio_data)
-            if len(current_chunk_array) == 0:
-                return self.candidate_text_cache.get(participant_id, "")
-            # Normalize the audio chunk
-            current_chunk_array = current_chunk_array.astype(np.float32)
-            if np.max(np.abs(current_chunk_array)) > 0:
-                current_chunk_array /= np.max(np.abs(current_chunk_array))
-            # Get existing accumulated audio array
-            existing_buffer = self.candidate_audio_buffers[participant_id]
-            if len(existing_buffer) > 0:
-                existing_array = self._bytes_to_audio_array(existing_buffer)
-                if len(existing_array) > 0:
-                    combined_array = np.concatenate([existing_array, current_chunk_array])
-                else:
-                    combined_array = current_chunk_array
-            else:
-                combined_array = current_chunk_array
-            # Convert back to bytes for storage
-            combined_bytes = self._audio_array_to_bytes(combined_array)
-            self.candidate_audio_buffers[participant_id] = combined_bytes
-            # Update silence counter based on voice activity
-            if not has_voice_activity:
-                self.silence_counters[participant_id] += 1
-            else:
-                self.silence_counters[participant_id] = 0
-            # Check if we should finalize sentence due to prolonged silence
-            should_finalize = (self.silence_counters[participant_id] >= self.silence_threshold and
-                              len(combined_array) > 0 and
-                              not self.sentence_finalized[participant_id])
-            if should_finalize:
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Always run transcription on the accumulated audio
-            audio_duration_sec = len(combined_array) / 16000.0  # 16kHz sample rate
-            if audio_duration_sec < 0.1:  # Very short minimum
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            # Force finalization if buffer gets too long
-            if audio_duration_sec > 15.0 and not self.sentence_finalized[participant_id]:
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Run voice activity detection on the accumulated audio before transcription
-            has_voice_in_buffer = self.has_meaningful_voice_activity(combined_bytes)
-            if not has_voice_in_buffer:
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            # Run transcription
-            await self.ensure_model_loaded(language_code)
-            # Double-check voice activity before running expensive ASR
-            has_voice_for_asr = self.has_voice_activity(combined_bytes)
-            if not has_voice_for_asr:
-                print(f"ONNX ASR: No voice activity detected, skipping ASR execution for {participant_id}")
-                if progress_callback:
-                    cached_text = self.candidate_text_cache.get(participant_id, "")
-                    await progress_callback(cached_text, False)
-                return self.candidate_text_cache.get(participant_id, "")
-            if language_code not in self.asr_models:
-                raise ValueError(f"ASR model not available for language: {language_code}")
-            print(f"ONNX ASR: Running transcription for {participant_id} with {audio_duration_sec:.2f}s of audio")
-            # Run ONNX inference (this service is ONNX-only)
-            model_config = self.asr_config[language_code]
-            if not model_config.get("use_onnx", False):
-                raise ValueError(f"Language {language_code} is not configured for ONNX. This service only supports ONNX models.")
-            # ONNX inference
-            text = await self._run_onnx_inference(combined_array, language_code)
-            # Filter out common ASR artifacts
-            artifacts = [
-                "thank you", "thanks", "bye", ".", ",", "?", "!",
-                "um", "uh", "ah", "hmm", "mm", "mhm",
-                "you", "the", "a", "an", "and", "but", "or",
-                "music", "laughter", "applause", "[music]", "[laughter]",
-            ]
-            # Check if the result is likely an artifact
-            is_artifact = (
-                len(text) < 3 or
-                text.lower() in artifacts or
-                len(text.split()) == 1 and len(text) < 6
-            )
-            if is_artifact:
-                text = self.candidate_text_cache.get(participant_id, "")
-            # Cache the current candidate text
-            self.candidate_text_cache[participant_id] = text
-            # Force completion if we have reasonable text and some silence
-            word_count = len(text.split()) if text else 0
-            if (word_count >= 3 and self.silence_counters[participant_id] >= 2 and
-                not self.sentence_finalized[participant_id]):
-                return await self._finalize_candidate_sentence(
-                    language_code, participant_id, sentence_callback
-                )
-            # Always send progress update
-            if progress_callback:
-                await progress_callback(text, False)
-            return text
-        except Exception as e:
-            print(f"ONNX TranscriptionService: Error processing audio chunk: {e}")
-            import traceback
-            traceback.print_exc()
-            if progress_callback:
-                cached_text = self.candidate_text_cache.get(participant_id, "")
-                await progress_callback(cached_text, False)
-            return self.candidate_text_cache.get(participant_id, "")
-    async def _run_onnx_inference(self, audio_array: np.ndarray, language_code: str) -> str:
-        """Run ONNX inference for speech recognition"""
-        try:
-            model = self.asr_models[language_code]
-            processor = self.processors[language_code]
-            model_config = self.asr_config[language_code]
-            # Check if this is a Whisper model
-            if model_config.get("model_type") == "whisper":
-                # Whisper-specific processing using Optimum
-                import torch
-                # Process audio input for Whisper
-                inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt")
-                # Generate transcription using the ORTModelForSpeechSeq2Seq
-                predicted_ids = model.generate(inputs.input_features, max_length=448)
-                # Decode the generated IDs
-                transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-                return transcription[0].strip() if transcription else ""
-            else:
-                # Original wav2vec2-bert processing
-                session = model
-                # Preprocess audio
-                inputs = processor(audio_array, sampling_rate=16000, return_tensors="np")
-                # Get input names for ONNX session
-                input_names = [inp.name for inp in session.get_inputs()]
-                # Prepare inputs for ONNX
-                onnx_inputs = {}
-                for name in input_names:
-                    if name in inputs:
-                        onnx_inputs[name] = inputs[name]
-                    elif name == "input_values" and "input_features" in inputs:
-                        onnx_inputs[name] = inputs["input_features"]
-                    elif name == "attention_mask" and "attention_mask" in inputs:
-                        onnx_inputs[name] = inputs["attention_mask"]
-                # Run ONNX inference
-                outputs = session.run(None, onnx_inputs)
-                # Post-process outputs (assuming CTC decoding)
-                logits = outputs[0]  # First output should be logits
-                # Simple greedy CTC decoding
-                predicted_ids = np.argmax(logits, axis=-1)
-                # Decode using processor
-                text = processor.batch_decode(predicted_ids)[0]
-                return text.strip()
-        except Exception as e:
-            print(f"ONNX ASR: Inference error: {e}")
-            import traceback
-            traceback.print_exc()
-            return ""
-    async def _finalize_candidate_sentence(self, language_code: str, participant_id: str,
-                                         sentence_callback: Optional[Callable] = None) -> str:
-        """Finalize the current candidate sentence and clear buffers"""
-        try:
-            if self.sentence_finalized.get(participant_id, False):
-                print(f"Sentence for participant {participant_id} already finalized, skipping duplicate")
-                return self.candidate_text_cache.get(participant_id, "")
-            final_text = self.candidate_text_cache.get(participant_id, "")
-            final_audio_bytes = self.candidate_audio_buffers.get(participant_id, b'')
-            if final_text and len(final_text.strip()) > 0:
-                self.sentence_finalized[participant_id] = True
-                if sentence_callback and len(final_audio_bytes) > 0:
-                    print(f"Finalizing sentence for participant {participant_id}: '{final_text}'")
-                    await sentence_callback(final_text, final_audio_bytes)
-            # Clear buffers for next sentence
-            self.candidate_audio_buffers[participant_id] = b''
-            self.candidate_text_cache[participant_id] = ""
-            self.silence_counters[participant_id] = 0
-            self.sentence_finalized[participant_id] = False
-            return final_text
-        except Exception as e:
-            print(f"Error finalizing sentence: {e}")
-            import traceback
-            traceback.print_exc()
-            self.sentence_finalized[participant_id] = False
-            return ""
-    def has_voice_activity(self, audio_data: bytes, threshold: float = 0.0005) -> bool:
-        """Enhanced VAD based on audio analysis"""
-        try:
-            audio_array = self._bytes_to_audio_array(audio_data)
-            if len(audio_array) == 0:
-                return False
-            # Normalize audio
-            audio_array = audio_array.astype(np.float32)
-            if np.max(np.abs(audio_array)) > 0:
-                audio_array /= np.max(np.abs(audio_array))
-            # Calculate multiple features for better VAD
-            rms = np.sqrt(np.mean(audio_array ** 2))
-            peak = np.max(np.abs(audio_array))
-            audio_std = np.std(audio_array)
-            zero_crossing_rate = np.sum(np.diff(np.sign(audio_array)) != 0) / len(audio_array)
-            # Voice activity detection
-            has_voice_rms = rms > threshold
-            has_voice_peak = peak > threshold * 3
-            has_voice_variation = audio_std > threshold * 0.8
-            has_voice_zcr = zero_crossing_rate > 0.008
-            has_voice = has_voice_rms or (has_voice_peak and has_voice_variation) or has_voice_zcr
-            return has_voice
-        except Exception as e:
-            print(f"Error in VAD: {e}")
-            return True
-    def has_meaningful_voice_activity(self, audio_data: bytes, threshold: float = 0.002) -> bool:
-        """Stricter VAD check for pre-transcription filtering"""
-        try:
-            audio_array = self._bytes_to_audio_array(audio_data)
-            if len(audio_array) == 0:
-                return False
-            # Normalize audio
-            audio_array = audio_array.astype(np.float32)
-            if np.max(np.abs(audio_array)) > 0:
-                audio_array /= np.max(np.abs(audio_array))
-            # Calculate features with higher thresholds
-            rms = np.sqrt(np.mean(audio_array ** 2))
-            peak = np.max(np.abs(audio_array))
-            audio_std = np.std(audio_array)
-            zero_crossing_rate = np.sum(np.diff(np.sign(audio_array)) != 0) / len(audio_array)
-            # Higher thresholds for meaningful speech detection
-            has_meaningful_voice = (
-                rms > threshold and
-                peak > threshold * 2 and
-                audio_std > threshold * 0.5 and
-                zero_crossing_rate > 0.015
-            )
-            return has_meaningful_voice
-        except Exception as e:
-            print(f"Error in meaningful VAD: {e}")
-            return False
-    async def force_complete_sentence(self, participant_id: str, language_code: str, sentence_callback: Optional[Callable] = None) -> str:
-        """Force complete any pending sentence for a participant"""
-        try:
-            if self.sentence_finalized.get(participant_id, False):
-                print(f"Force completion: Sentence for participant {participant_id} already finalized")
-                return ""
-            if participant_id in self.candidate_text_cache:
-                cached_text = self.candidate_text_cache[participant_id]
-                if cached_text and len(cached_text.strip()) > 0:
-                    result = await self._finalize_candidate_sentence(language_code, participant_id, sentence_callback)
-                    return result
-            return ""
-        except Exception as e:
-            print(f"Error in force_complete_sentence: {e}")
-            import traceback
-            traceback.print_exc()
-            return ""
-    async def transcribe_audio(self, audio_data: bytes, language_code: str, callback: Optional[Callable] = None) -> str:
-        """Transcribe audio data to text using ONNX models"""
-        try:
-            # Check for voice activity before running ASR
-            has_voice = self.has_voice_activity(audio_data)
-            if not has_voice:
-                print(f"ONNX ASR: No voice activity detected, skipping transcription")
-                return ""
-            await self.ensure_model_loaded(language_code)
-            if language_code not in self.asr_models:
-                raise ValueError(f"ASR model not available for language: {language_code}")
-            # Convert audio bytes to numpy array
-            audio_array = self._bytes_to_audio_array(audio_data)
-            print(f"ONNX ASR: Running transcription with {len(audio_array)/16000:.2f}s of audio")
-            # Run ONNX inference (this service is ONNX-only)
-            model_config = self.asr_config[language_code]
-            if not model_config.get("use_onnx", False):
-                raise ValueError(f"Language {language_code} is not configured for ONNX. This service only supports ONNX models.")
-            # ONNX inference
-            text = await self._run_onnx_inference(audio_array, language_code)
-            if callback:
-                await callback(text)
-            return text
-        except Exception as e:
-            print(f"ONNX TranscriptionService: Transcription error: {e}")
-            import traceback
-            traceback.print_exc()
-            return ""
-    def _bytes_to_audio_array(self, audio_data: bytes) -> np.ndarray:
-        """Convert audio bytes to numpy array"""
-        try:
-            # Try to decode as WAV
-            try:
-                audio_io = io.BytesIO(audio_data)
-                with wave.open(audio_io, 'rb') as wav_file:
-                    frames = wav_file.readframes(-1)
-                    audio_array = np.frombuffer(frames, dtype=np.int16)
-                    # Convert to float32 and normalize
-                    audio_array = audio_array.astype(np.float32) / 32768.0
-                    return audio_array
-            except Exception:
-                pass
-            # Fallback: assume raw float32 audio data
-            try:
-                audio_array = np.frombuffer(audio_data, dtype=np.float32)
-                return audio_array
-            except Exception:
-                pass
-            return np.array([], dtype=np.float32)
-        except Exception as e:
-            print(f"ONNX TranscriptionService: Audio conversion error: {e}")
-            return np.array([], dtype=np.float32)
-    def _audio_array_to_bytes(self, audio_array: np.ndarray) -> bytes:
-        """Convert numpy audio array back to WAV bytes for storage"""
-        try:
-            if audio_array.dtype != np.float32:
-                audio_array = audio_array.astype(np.float32)
-            # Convert to 16-bit PCM for WAV storage
-            audio_int16 = (audio_array * 32767).astype(np.int16)
-            # Create WAV bytes
-            wav_buffer = io.BytesIO()
-            with wave.open(wav_buffer, 'wb') as wav_file:
-                wav_file.setnchannels(1)  # Mono
-                wav_file.setsampwidth(2)  # 16-bit
-                wav_file.setframerate(16000)  # 16kHz
-                wav_file.writeframes(audio_int16.tobytes())
-            return wav_buffer.getvalue()
-        except Exception as e:
-            print(f"Error converting audio array to bytes: {e}")
-            return b''
-    def clear_participant_buffers(self, participant_id: str):
-        """Clear all buffers for a participant"""
-        if participant_id in self.candidate_audio_buffers:
-            del self.candidate_audio_buffers[participant_id]
-        if participant_id in self.candidate_text_cache:
-            del self.candidate_text_cache[participant_id]
-        if participant_id in self.silence_counters:
-            del self.silence_counters[participant_id]
-        if participant_id in self.sentence_finalized:
-            del self.sentence_finalized[participant_id]
-    async def cleanup(self):
-        """Cleanup resources"""
-        self.asr_models.clear()
-        self.processors.clear()
-        self.model_cache.clear()

app/services/transcription_service_onnx_optimized.py DELETED Viewed

@@ -1,251 +0,0 @@
-import asyncio
-import io
-import wave
-import numpy as np
-from typing import Dict, Optional, Callable
-from collections import OrderedDict
-import onnxruntime as ort
-from transformers import AutoProcessor, WhisperProcessor
-from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
-import os
-from app.models import LanguageCode
-class OptimizedONNXTranscriptionService:
-    """
-    Optimized ONNX Transcription Service that uses pre-converted ONNX models
-    instead of performing runtime conversion from PyTorch models.
-    Benefits:
-    - Faster container startup (no conversion time)
-    - Reduced memory usage during initialization
-    - More predictable deployment times
-    - Better resource utilization in production
-    """
-    def __init__(self):
-        self.asr_models: Dict[str, any] = {}
-        self.processors: Dict[str, any] = {}
-        self.max_asr_models = 2  # Memory management - keep max 2 models loaded
-        self.model_cache = OrderedDict()  # LRU cache for models
-        # GPU optimization
-        self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if ort.get_available_providers().__contains__('CUDAExecutionProvider') else ['CPUExecutionProvider']
-        # OPTIMIZED ONNX Model configurations - using pre-converted models
-        self.asr_config = {
-            # English: Use pre-converted ONNX model (no runtime conversion!)
-            "eng": {
-                "model_repo": "mutisya/whisper-medium-en-onnx",  # Pre-converted ONNX model
-                "model_type": "whisper",
-                "use_onnx": True,
-                "export": False  # ⭐ KEY CHANGE: No runtime export needed!
-            },
-            # African languages: Already using ONNX models
-            "swa": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-swh-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "kik": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-kik-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "kam": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-kam-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "mer": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-mer-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "luo": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-luo-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True},
-            "som": {"model_repo": "mutisya/w2v-bert-2.0-asr-onnx-som-v25-37-1", "model_type": "wav2vec2-bert", "use_onnx": True}
-        }
-        self.preload_languages = ["eng"]
-        # Enhanced audio buffering for VAD-based sentence detection
-        self.candidate_audio_buffers: Dict[str, bytes] = {}
-        self.candidate_text_cache: Dict[str, str] = {}
-        self.silence_counters: Dict[str, int] = {}
-        self.sentence_finalized: Dict[str, bool] = {}
-        # VAD parameters
-        self.silence_threshold = 2
-        self.min_sentence_length = 0.03
-    async def initialize(self):
-        """Initialize ASR models for preloaded languages"""
-        print(f"🚀 Optimized ONNX ASR: Initializing with providers: {self.providers}")
-        print(f"📈 Performance Improvement: Using pre-converted ONNX models (no runtime conversion)")
-        for lang_code in self.preload_languages:
-            if lang_code in self.asr_config:
-                try:
-                    start_time = asyncio.get_event_loop().time()
-                    await self.ensure_model_loaded(lang_code)
-                    end_time = asyncio.get_event_loop().time()
-                    print(f"⚡ Model loading time for {lang_code}: {end_time - start_time:.2f}s")
-                except Exception as e:
-                    print(f"❌ Failed to load ASR model for {lang_code}: {e}")
-    async def ensure_model_loaded(self, language_code: str):
-        """Load ASR model for language if not already loaded with LRU cache"""
-        if language_code in self.model_cache:
-            # Move to end (most recently used)
-            self.model_cache.move_to_end(language_code)
-            return
-        if language_code not in self.asr_config:
-            raise ValueError(f"Language {language_code} not supported")
-        model_config = self.asr_config[language_code]
-        # Check if we need to evict old models
-        while len(self.model_cache) >= self.max_asr_models:
-            # Remove least recently used model
-            old_lang, _ = self.model_cache.popitem(last=False)
-            if old_lang in self.asr_models:
-                del self.asr_models[old_lang]
-            if old_lang in self.processors:
-                del self.processors[old_lang]
-            print(f"🗑️ ONNX ASR: Evicted model for {old_lang} (LRU cache)")
-        try:
-            if model_config.get("use_onnx", False):
-                # Load ONNX model
-                print(f"📥 ONNX ASR: Loading ONNX model for {language_code}")
-                # Special handling for Whisper models
-                if model_config.get("model_type") == "whisper":
-                    print(f"🎙️ ONNX ASR: Loading pre-converted Whisper ONNX model from {model_config['model_repo']}")
-                    # Load pre-converted Whisper ONNX model using Optimum
-                    load_kwargs = {
-                        # Note: No 'export' parameter needed since model is already in ONNX format
-                        # This is the key optimization - no runtime conversion!
-                    }
-                    # Add subfolder if specified (for models that store ONNX in subfolders)
-                    if "subfolder" in model_config:
-                        load_kwargs["subfolder"] = model_config["subfolder"]
-                    # ⭐ KEY OPTIMIZATION: No export flag needed for pre-converted models
-                    # The old code had: if model_config.get("export", False): load_kwargs["export"] = True
-                    # Now we skip this entirely since the model is already in ONNX format
-                    model = ORTModelForSpeechSeq2Seq.from_pretrained(
-                        model_config["model_repo"],
-                        **load_kwargs
-                    )
-                    # Load Whisper processor
-                    processor = WhisperProcessor.from_pretrained(model_config["model_repo"])
-                    # Configure for English transcription
-                    model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(
-                        language="en",
-                        task="transcribe"
-                    )
-                    self.asr_models[language_code] = model
-                    self.processors[language_code] = processor
-                    print(f"✅ ONNX ASR: Successfully loaded pre-converted Whisper ONNX model for {language_code}")
-                else:
-                    # Original wav2vec2-bert model loading logic (unchanged)
-                    # Create ONNX session with optimizations
-                    session_options = ort.SessionOptions()
-                    session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-                    # Enable parallel execution
-                    session_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
-                    model_path = model_config["model_repo"]
-                    try:
-                        # Try to load from HuggingFace directly
-                        from huggingface_hub import hf_hub_download
-                        model_file = hf_hub_download(repo_id=model_path, filename="model.onnx")
-                        # Create ONNX Runtime session
-                        session = ort.InferenceSession(
-                            model_file,
-                            session_options,
-                            providers=self.providers
-                        )
-                        # Load processor/tokenizer
-                        processor = AutoProcessor.from_pretrained(model_path)
-                        self.asr_models[language_code] = session
-                        self.processors[language_code] = processor
-                        print(f"✅ ONNX ASR: Successfully loaded {model_config['model_type']} ONNX model for {language_code}")
-                    except Exception as e:
-                        print(f"❌ Error loading ONNX model {model_path}: {e}")
-                        raise
-            else:
-                raise ValueError(f"Non-ONNX models not supported in optimized service")
-            # Add to cache
-            self.model_cache[language_code] = True
-        except Exception as e:
-            print(f"❌ Error loading model for {language_code}: {e}")
-            raise
-    # Rest of the methods remain the same as the original transcription service
-    # (transcribe_audio, process_audio_chunk, etc.)
-    # ... [Include all other methods from the original service]
-    async def transcribe_audio(self, participant_id: str, audio_data: bytes, language_code: str = "eng") -> Optional[str]:
-        """Transcribe audio using ONNX models"""
-        try:
-            await self.ensure_model_loaded(language_code)
-            if language_code not in self.asr_models or language_code not in self.processors:
-                raise ValueError(f"Model not loaded for language: {language_code}")
-            model = self.asr_models[language_code]
-            processor = self.processors[language_code]
-            # Convert audio bytes to numpy array
-            audio_io = io.BytesIO(audio_data)
-            with wave.open(audio_io, 'rb') as wav_file:
-                frames = wav_file.readframes(-1)
-                sample_rate = wav_file.getframerate()
-                audio_np = np.frombuffer(frames, dtype=np.int16).astype(np.float32) / 32768.0
-            # Get model configuration
-            model_config = self.asr_config[language_code]
-            if model_config.get("model_type") == "whisper":
-                # Process with Whisper ONNX model
-                inputs = processor(audio_np, sampling_rate=sample_rate, return_tensors="pt")
-                with torch.no_grad():
-                    predicted_ids = model.generate(**inputs)
-                    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-                return transcription.strip()
-            else:
-                # Process with wav2vec2-bert ONNX model
-                inputs = processor(audio_np, sampling_rate=sample_rate, return_tensors="np")
-                # Run ONNX inference
-                ort_inputs = {model.get_inputs()[0].name: inputs.input_values}
-                ort_outputs = model.run(None, ort_inputs)
-                # Decode results
-                predicted_ids = np.argmax(ort_outputs[0], axis=-1)
-                transcription = processor.decode(predicted_ids[0])
-                return transcription.strip()
-        except Exception as e:
-            print(f"❌ Transcription error for {participant_id}: {e}")
-            return None
-    def get_performance_stats(self) -> Dict[str, any]:
-        """Get performance statistics for monitoring"""
-        return {
-            "loaded_models": list(self.model_cache.keys()),
-            "cache_size": len(self.model_cache),
-            "max_cache_size": self.max_asr_models,
-            "providers": self.providers,
-            "optimization_enabled": True,
-            "runtime_conversion": False  # Key metric: no runtime conversion
-        }

app/services/translation_service.py DELETED Viewed

@@ -1,151 +0,0 @@
-import asyncio
-from typing import Dict, Optional
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-import torch
-import nltk
-from app.models import LanguageCode
-from app.services.quantization_utils import apply_dynamic_int8_quantization, get_quantization_stats
-# FLORES-200 language codes mapping
-FLORES_CODES = {
-    "English": "eng_Latn",
-    "eng": "eng_Latn",
-    "Swahili": "swh_Latn",
-    "swa": "swh_Latn",
-    "Kikuyu": "kik_Latn",
-    "kik": "kik_Latn",
-    "Kamba": "kam_Latn",
-    "kam": "kam_Latn",
-    "Kimeru": "mer_Latn",
-    "mer": "mer_Latn",
-    "Luo": "luo_Latn",
-    "luo": "luo_Latn",
-    "Somali": "som_Latn",
-    "som": "som_Latn",
-}
-class TranslationService:
-    def __init__(self, enable_quantization: bool = True):
-        self.translation_pipeline = None
-        self.device = 0 if torch.cuda.is_available() else -1
-        self.model_path = "mutisya/nllb_600m-en-kik-kam-luo-mer-som-swh-drL-24_5-filtered-v24_28_4"
-        self.enable_quantization = enable_quantization
-    async def initialize(self):
-        """Initialize translation model"""
-        try:
-            # Download NLTK data with better error handling
-            try:
-                nltk.download("punkt", quiet=True)
-                nltk.download('punkt_tab', quiet=True)
-            except Exception as nltk_error:
-                print(f"Warning: NLTK data download failed: {nltk_error}")
-                # Continue anyway, sentence tokenization might still work
-            # Load translation model with explicit model kwargs for newer transformers
-            print(f"Loading translation model: {self.model_path}")
-            model = AutoModelForSeq2SeqLM.from_pretrained(
-                self.model_path,
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-            )
-            tokenizer = AutoTokenizer.from_pretrained(self.model_path)
-            # Apply quantization if enabled
-            if self.enable_quantization:
-                try:
-                    print("Applying INT8 quantization to translation model...")
-                    model = apply_dynamic_int8_quantization(model, "translation")
-                    stats = get_quantization_stats(model)
-                    print(f"✓ Translation model quantized: {stats['quantized_layers']}/{stats['total_layers']} layers, {stats['size_mb']:.2f} MB")
-                except Exception as e:
-                    print(f"Warning: Could not quantize translation model: {e}")
-                    print(f"Continuing with unquantized model")
-            self.translation_pipeline = pipeline(
-                'translation',
-                model=model,
-                tokenizer=tokenizer,
-                device=self.device,
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-            )
-        except Exception as e:
-            print(f"Failed to initialize translation service: {e}")
-            raise
-    async def translate_text(self, text: str, source_lang: str, target_lang: str) -> str:
-        """Translate text from source language to target language"""
-        print(f"=== TRANSLATION REQUEST ===")
-        print(f"Text: '{text}'")
-        print(f"Source: {source_lang}")
-        print(f"Target: {target_lang}")
-        if not self.translation_pipeline:
-            print("TRANSLATION ERROR: Translation service not initialized")
-            raise RuntimeError("Translation service not initialized")
-        if not text or not text.strip():
-            print("TRANSLATION ERROR: Empty text provided")
-            return ""
-        try:
-            # Get FLORES codes
-            src_code = FLORES_CODES.get(source_lang, "eng_Latn")
-            tgt_code = FLORES_CODES.get(target_lang, "eng_Latn")
-            print(f"FLORES codes: {source_lang} -> {src_code}, {target_lang} -> {tgt_code}")
-            # Skip translation if same language
-            if src_code == tgt_code:
-                print("TRANSLATION SKIPPED: Same source and target language")
-                return text
-            # Tokenize into sentences for better translation
-            sentences = nltk.sent_tokenize(text)
-            translated_sentences = []
-            print(f"Translating {len(sentences)} sentences...")
-            for i, sentence in enumerate(sentences):
-                if sentence.strip():
-                    print(f"Translating sentence {i+1}: '{sentence}'")
-                    result = self.translation_pipeline(
-                        sentence,
-                        src_lang=src_code,
-                        tgt_lang=tgt_code
-                    )
-                    translated = result[0]['translation_text']
-                    print(f"Translation result: '{translated}'")
-                    # Preserve punctuation and capitalization
-                    if sentence.strip().endswith(".") and not translated.strip().endswith("."):
-                        translated += "."
-                    if sentence.strip()[0].isupper() and translated.strip():
-                        translated = translated[0].upper() + translated[1:]
-                    translated_sentences.append(translated)
-            final_translation = " ".join(translated_sentences)
-            # Preserve paragraph breaks
-            if text.endswith(".\n\n"):
-                final_translation += ".\n\n"
-            print(f"FINAL TRANSLATION: '{final_translation}'")
-            print(f"=== TRANSLATION COMPLETE ===")
-            return final_translation
-        except Exception as e:
-            print(f"TRANSLATION ERROR: {e}")
-            import traceback
-            traceback.print_exc()
-            return text  # Return original text if translation fails
-    async def cleanup(self):
-        """Cleanup resources"""
-        self.translation_pipeline = None

app/services/translation_service_onnx.py DELETED Viewed

@@ -1,268 +0,0 @@
-import asyncio
-from typing import Dict, Optional
-from transformers import AutoTokenizer, pipeline
-from optimum.onnxruntime import ORTModelForSeq2SeqLM
-import nltk
-from app.models import LanguageCode
-# FLORES-200 language codes mapping
-FLORES_CODES = {
-    "English": "eng_Latn",
-    "eng": "eng_Latn",
-    "Swahili": "swh_Latn",
-    "swa": "swh_Latn",
-    "Kikuyu": "kik_Latn",
-    "kik": "kik_Latn",
-    "Kamba": "kam_Latn",
-    "kam": "kam_Latn",
-    "Kimeru": "mer_Latn",
-    "mer": "mer_Latn",
-    "Luo": "luo_Latn",
-    "luo": "luo_Latn",
-    "Somali": "som_Latn",
-    "som": "som_Latn",
-}
-class ONNXTranslationService:
-    def __init__(self):
-        self.model = None
-        self.tokenizer = None
-        self.translation_pipeline = None
-        # Use ONNX optimized NLLB model (FP32 format with separate encoder/decoder)
-        self.model_repo = "mutisya/nllb-translation-onnx-v25-37-1"
-    async def initialize(self):
-        """Initialize ONNX translation model using optimum.onnxruntime"""
-        try:
-            print("ONNX Translation: Initializing translation service with ONNX Runtime...")
-            print(f"ONNX Translation: Loading model from {self.model_repo}")
-            # Check available providers for GPU detection
-            import onnxruntime as ort
-            available_providers = ort.get_available_providers()
-            print(f"ONNX Translation: Available providers: {available_providers}")
-            # Download NLTK data with better error handling
-            try:
-                nltk.download("punkt", quiet=True)
-                nltk.download('punkt_tab', quiet=True)
-            except Exception as nltk_error:
-                print(f"Warning: NLTK data download failed: {nltk_error}")
-            # Get authentication token for private repo
-            import os
-            auth_token = os.getenv('HUGGING_FACE_HUB_TOKEN') or os.getenv('HF_TOKEN')
-            # Configure providers list for optimal performance
-            print("ONNX Translation: Configuring execution providers...")
-            if 'CUDAExecutionProvider' in available_providers:
-                # Use both CUDA and CPU providers to eliminate assignment warnings
-                providers_list = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-                primary_provider = 'CUDAExecutionProvider'
-                print(f"ONNX Translation: Using providers: {providers_list} (primary: {primary_provider})")
-            else:
-                providers_list = ['CPUExecutionProvider']
-                primary_provider = 'CPUExecutionProvider'
-                print(f"ONNX Translation: Using CPU-only providers: {providers_list}")
-            # Load ONNX model using optimum (handles separate encoder/decoder files)
-            # Configure session options for optimal CUDA performance
-            import onnxruntime as ort
-            session_options = ort.SessionOptions()
-            session_options.log_severity_level = 1  # WARNING level for detailed logs
-            session_options.logid = "ONNX_Translation"
-            # Enable all graph optimizations to reduce memcpy operations
-            session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-            # Optimize threading for better GPU utilization
-            session_options.inter_op_num_threads = 1  # Reduce CPU thread contention
-            session_options.intra_op_num_threads = 1  # Focus on GPU execution
-            # Note: enable_cuda_graph not available in this ONNX Runtime version
-            # Configure provider options with performance optimizations for CUDA
-            provider_options = []
-            if primary_provider == 'CUDAExecutionProvider':
-                cuda_options = {
-                    'device_id': 0,
-                    'arena_extend_strategy': 'kNextPowerOfTwo',
-                    'gpu_mem_limit': int(0.6 * 1024 * 1024 * 1024),  # 60% of GPU memory for translation
-                    'cudnn_conv_algo_search': 'EXHAUSTIVE',
-                    'cudnn_conv_use_max_workspace': '1',  # Enable max workspace for fp16 tensor cores
-                    'do_copy_in_default_stream': True,
-                    'enable_skip_layer_norm_strict_mode': False,  # Better performance for transformers
-                    'prefer_nhwc': True,  # Optimize data layout for GPU
-                }
-                # Configure providers with options
-                provider_options = [
-                    ('CUDAExecutionProvider', cuda_options),
-                    ('CPUExecutionProvider', {})
-                ]
-            # Try with optimized provider configuration and session options
-            try:
-                print("ONNX Translation: Attempting optimized provider configuration...")
-                self.model = ORTModelForSeq2SeqLM.from_pretrained(
-                    self.model_repo,
-                    token=auth_token,
-                    providers=provider_options if provider_options else providers_list,  # Use provider options or list
-                    session_options=session_options,               # Add session options
-                )
-                print(f"ONNX Translation: Model loaded successfully with providers: {providers_list}")
-                # Check what providers the model is actually using
-                if hasattr(self.model, 'providers'):
-                    print(f"ONNX Translation: Model is using providers: {self.model.providers}")
-                if hasattr(self.model, 'device'):
-                    print(f"ONNX Translation: Model device: {self.model.device}")
-            except Exception as e1:
-                print(f"ONNX Translation: Optimized provider approach failed: {e1}")
-                print("ONNX Translation: Falling back to simple provider list...")
-                # Fallback: Try with simple provider list (no options)
-                try:
-                    self.model = ORTModelForSeq2SeqLM.from_pretrained(
-                        self.model_repo,
-                        token=auth_token,
-                        providers=providers_list,  # Simple provider list
-                        session_options=session_options,
-                    )
-                    print(f"ONNX Translation: Model loaded successfully with simple providers: {providers_list}")
-                    # Check what the model is actually using
-                    if hasattr(self.model, 'providers'):
-                        print(f"ONNX Translation: Model is using providers: {self.model.providers}")
-                    if hasattr(self.model, 'device'):
-                        print(f"ONNX Translation: Model device: {self.model.device}")
-                except Exception as e2:
-                    print(f"ONNX Translation: Simple provider approach failed: {e2}")
-                    print("ONNX Translation: Falling back to auto-detect...")
-                    # Final fallback: Let model auto-detect
-                    self.model = ORTModelForSeq2SeqLM.from_pretrained(
-                        self.model_repo,
-                        token=auth_token
-                        # Not passing provider, letting it auto-detect based on device
-                    )
-                    print(f"ONNX Translation: Model loaded successfully with auto-detection")
-                    # Check what the model is actually using
-                    if hasattr(self.model, 'providers'):
-                        print(f"ONNX Translation: Model auto-selected providers: {self.model.providers}")
-                    if hasattr(self.model, 'device'):
-                        print(f"ONNX Translation: Model device: {self.model.device}")
-            # Load tokenizer
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.model_repo,
-                token=auth_token
-            )
-            # Create translation pipeline
-            # For ONNX models, we should specify device to ensure pipeline uses GPU
-            # Use the same provider detection as the model to ensure consistency
-            device = 0 if primary_provider == 'CUDAExecutionProvider' else -1
-            print(f"ONNX Translation: Setting pipeline device to: {device} ({'GPU' if device >= 0 else 'CPU'})")
-            print(f"ONNX Translation: Pipeline will use device based on primary provider: {primary_provider}")
-            self.translation_pipeline = pipeline(
-                "translation",
-                model=self.model,
-                tokenizer=self.tokenizer,
-                device=device
-            )
-            print("ONNX Translation: Successfully initialized ONNX translation model")
-        except Exception as e:
-            print(f"Failed to initialize ONNX translation service: {e}")
-            print("ONNX translation model is not available. Please ensure the model repository exists and contains the required ONNX files.")
-            import traceback
-            traceback.print_exc()
-            raise RuntimeError(f"ONNX translation model unavailable at {self.model_repo}: {e}")
-    async def translate_text(self, text: str, source_lang: str, target_lang: str) -> str:
-        """Translate text from source language to target language using ONNX"""
-        print(f"=== ONNX TRANSLATION REQUEST ===")
-        print(f"Text: '{text}'")
-        print(f"Source: {source_lang}")
-        print(f"Target: {target_lang}")
-        if not self.translation_pipeline:
-            print("ONNX TRANSLATION ERROR: Translation service not initialized")
-            raise RuntimeError("ONNX Translation service not initialized")
-        if not text or not text.strip():
-            print("ONNX TRANSLATION ERROR: Empty text provided")
-            return ""
-        try:
-            # Get FLORES codes
-            src_code = FLORES_CODES.get(source_lang, "eng_Latn")
-            tgt_code = FLORES_CODES.get(target_lang, "eng_Latn")
-            print(f"FLORES codes: {source_lang} -> {src_code}, {target_lang} -> {tgt_code}")
-            # Skip translation if same language
-            if src_code == tgt_code:
-                print("ONNX TRANSLATION SKIPPED: Same source and target language")
-                return text
-            # Tokenize into sentences for better translation
-            sentences = nltk.sent_tokenize(text)
-            translated_sentences = []
-            print(f"Translating {len(sentences)} sentences with ONNX...")
-            for i, sentence in enumerate(sentences):
-                if sentence.strip():
-                    print(f"Translating sentence {i+1}: '{sentence}'")
-                    # Use the pipeline for translation
-                    result = self.translation_pipeline(
-                        sentence.strip(),
-                        src_lang=src_code,
-                        tgt_lang=tgt_code,
-                        max_length=512
-                    )
-                    translated = result[0]['translation_text']
-                    print(f"ONNX Translation result: '{translated}'")
-                    # Preserve punctuation and capitalization
-                    if sentence.strip().endswith(".") and not translated.strip().endswith("."):
-                        translated += "."
-                    if sentence.strip() and sentence.strip()[0].isupper() and translated.strip():
-                        translated = translated[0].upper() + translated[1:]
-                    translated_sentences.append(translated)
-            final_translation = " ".join(translated_sentences)
-            # Preserve paragraph breaks
-            if text.endswith(".\n\n"):
-                final_translation += ".\n\n"
-            print(f"ONNX FINAL TRANSLATION: '{final_translation}'")
-            print(f"=== ONNX TRANSLATION COMPLETE ===")
-            return final_translation
-        except Exception as e:
-            print(f"ONNX TRANSLATION ERROR: {e}")
-            import traceback
-            traceback.print_exc()
-            raise RuntimeError(f"Translation failed: {e}")
-    async def cleanup(self):
-        """Cleanup resources"""
-        self.model = None
-        self.tokenizer = None
-        self.translation_pipeline = None
-        print("ONNX Translation: Translation service cleaned up")

app/services/tts_service.py DELETED Viewed

@@ -1,541 +0,0 @@
-import asyncio
-import io
-import wave
-import numpy as np
-import subprocess
-from typing import Dict, Optional
-from transformers import pipeline
-import torch
-import os
-from app.services.quantization_utils import apply_dynamic_int8_quantization, get_quantization_stats
-class TTSService:
-    def __init__(self, enable_quantization: bool = True):
-        self.tts_pipelines: Dict[str, any] = {}
-        self.device = 0 if torch.cuda.is_available() else -1
-        self.enable_quantization = enable_quantization
-        # Check if espeak is available
-        self.espeak_available = self._check_espeak_availability()
-        # TTS model configurations from your original code
-        self.tts_config = {
-            "kik": {"model_repo": "mutisya/vits_kik_drL_24_5-v24_27_1_f", "model_type": "vits"},
-            "luo": {"model_repo": "mutisya/vits_luo_drL_24_5-v24_27_1_f", "model_type": "vits"},
-            "kam": {"model_repo": "mutisya/vits_kam_drL_24_5-v24_27_1_f", "model_type": "vits"},
-            "mer": {"model_repo": "mutisya/vits_mer_drL_24_5-v24_27_1_f", "model_type": "vits"},
-            "som": {"model_repo": "mutisya/vits_som_drL_24_5-v24_27_1_m", "model_type": "vits"},
-            "swa": {"model_repo": "mutisya/vits_swh_biblica-v24_27_1_m", "model_type": "vits"},
-            "eng": {"model_repo": "kakao-enterprise/vits-ljs", "model_type": "vits"},
-        }
-        # Alternative TTS models that don't require espeak (fallback)
-        self.fallback_tts_config = {
-            "eng": {"model_repo": "microsoft/speecht5_tts", "model_type": "speecht5"},
-            "swa": {"model_repo": "facebook/mms-tts-swh", "model_type": "mms"},
-            "som": {"model_repo": "facebook/mms-tts-som", "model_type": "mms"},
-        }
-        self.preload_languages = ["kik", "swa"]
-        self.background_loading_task = None
-        self.models_loading_status = {}
-    def _check_espeak_availability(self) -> bool:
-        """Check if espeak is available on the system"""
-        try:
-            result = subprocess.run(['espeak', '--version'],
-                                  capture_output=True, text=True, timeout=5)
-            if result.returncode == 0:
-                print("TTS: espeak is available")
-                return True
-            else:
-                print("TTS: espeak command failed")
-                return False
-        except (subprocess.TimeoutExpired, FileNotFoundError, Exception) as e:
-            print(f"TTS: espeak not available: {e}")
-            return False
-    async def initialize(self):
-        """Initialize TTS models for preloaded languages"""
-        print("TTS: Initializing TTS service...")
-        print(f"TTS: espeak available: {self.espeak_available}")
-        for lang_code in self.preload_languages:
-            await self.ensure_model_loaded(lang_code)
-    def _load_and_quantize_tts_pipeline(self, lang_code: str, model_repo: str, model_type: str = "vits"):
-        """Load TTS pipeline and optionally apply INT8 quantization"""
-        print(f"TTS: Loading model for {lang_code}: {model_repo}")
-        pipeline_obj = pipeline(
-            "text-to-speech",
-            model=model_repo,
-            device=self.device
-        )
-        # Apply quantization if enabled
-        if self.enable_quantization:
-            try:
-                # Get the underlying model from the pipeline
-                model = pipeline_obj.model
-                print(f"TTS: Applying INT8 quantization to {lang_code} model...")
-                quantized_model = apply_dynamic_int8_quantization(model, model_type)
-                # Replace the model in the pipeline
-                pipeline_obj.model = quantized_model
-                # Print quantization stats
-                stats = get_quantization_stats(quantized_model)
-                print(f"✓ TTS {lang_code} model quantized: {stats['quantized_layers']}/{stats['total_layers']} layers, {stats['size_mb']:.2f} MB")
-            except Exception as e:
-                print(f"TTS: Warning - Could not quantize {lang_code} model: {e}")
-                print(f"TTS: Continuing with unquantized model")
-        return pipeline_obj
-    async def ensure_model_loaded(self, language_code: str):
-        """Load TTS model for language if not already loaded"""
-        if language_code in self.tts_pipelines:
-            return
-        # First try to load primary model if espeak is available
-        if self.espeak_available and language_code in self.tts_config:
-            try:
-                model_config = self.tts_config[language_code]
-                pipeline_obj = self._load_and_quantize_tts_pipeline(
-                    language_code,
-                    model_config["model_repo"],
-                    model_config.get("model_type", "vits")
-                )
-                self.tts_pipelines[language_code] = pipeline_obj
-                print(f"TTS: Loaded primary TTS model for {language_code}")
-                return
-            except Exception as e:
-                print(f"TTS: Failed to load primary TTS model for {language_code}: {e}")
-                # Continue to try fallback models
-        # Try fallback models if primary failed or espeak not available
-        if language_code in self.fallback_tts_config:
-            try:
-                model_config = self.fallback_tts_config[language_code]
-                if model_config["model_type"] == "speecht5":
-                    # Special handling for SpeechT5
-                    from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
-                    import torch
-                    processor = SpeechT5Processor.from_pretrained(model_config["model_repo"])
-                    model = SpeechT5ForTextToSpeech.from_pretrained(model_config["model_repo"])
-                    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-                    # Create a custom pipeline-like object
-                    class SpeechT5Pipeline:
-                        def __init__(self, processor, model, vocoder):
-                            self.processor = processor
-                            self.model = model
-                            self.vocoder = vocoder
-                        def __call__(self, text):
-                            inputs = self.processor(text=text, return_tensors="pt")
-                            # Use default speaker embeddings
-                            import datasets
-                            embeddings_dataset = datasets.load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
-                            speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
-                            speech = self.model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=self.vocoder)
-                            return {
-                                "audio": speech.numpy(),
-                                "sampling_rate": 16000
-                            }
-                    pipeline_obj = SpeechT5Pipeline(processor, model, vocoder)
-                else:
-                    # Standard pipeline for MMS models
-                    pipeline_obj = pipeline(
-                        "text-to-speech",
-                        model=model_config["model_repo"],
-                        device=self.device
-                    )
-                self.tts_pipelines[language_code] = pipeline_obj
-                print(f"TTS: Loaded fallback TTS model for {language_code}")
-                return
-            except Exception as e:
-                print(f"TTS: Failed to load fallback TTS model for {language_code}: {e}")
-        print(f"TTS: No TTS model available for language: {language_code}")
-    async def generate_speech(self, text: str, language_code: str, output_format: str = "webm") -> Optional[bytes]:
-        """Generate speech audio from text
-        Args:
-            text: Text to convert to speech
-            language_code: Language code for TTS model
-            output_format: Output format - "webm" (default, web-compatible) or "wav" (Android-compatible)
-        Returns:
-            Audio bytes in the requested format, or None if generation fails
-        """
-        try:
-            print(f"=== TTS GENERATION REQUEST ===")
-            print(f"Text: '{text}'")
-            print(f"Language: {language_code}")
-            print(f"Output format: {output_format}")
-            # Input validation: Check for invalid or problematic text
-            if not text or not text.strip():
-                print("TTS: Empty or whitespace-only text, skipping TTS generation")
-                return None
-            # Check for very short text that might cause issues
-            clean_text = text.strip()
-            if len(clean_text) <= 2 and clean_text in [".", ",", "!", "?", ":", ";", "-"]:
-                print(f"TTS: Text '{clean_text}' is too short or punctuation-only, skipping TTS generation")
-                return None
-            # Check for minimum meaningful length
-            if len(clean_text.replace(" ", "").replace(".", "").replace(",", "")) < 2:
-                print(f"TTS: Text '{clean_text}' has insufficient content for TTS, skipping")
-                return None
-            print(f"TTS pipelines available: {list(self.tts_pipelines.keys())}")
-            print(f"TTS config available: {list(self.tts_config.keys())}")
-            print(f"Fallback config available: {list(self.fallback_tts_config.keys())}")
-            # Check if the language is supported
-            if language_code not in self.tts_config and language_code not in self.fallback_tts_config:
-                print(f"TTS: Language {language_code} not configured for TTS")
-                return None
-            await self.ensure_model_loaded(language_code)
-            if language_code not in self.tts_pipelines:
-                print(f"TTS: TTS model not available for language: {language_code}")
-                return None
-            if not text or not text.strip():
-                print("TTS: Empty text provided")
-                return None
-            print(f"TTS: Generating speech for '{text}' in {language_code}")
-            # Generate speech
-            pipeline_obj = self.tts_pipelines[language_code]
-            result = pipeline_obj(text)
-            audio_array = result["audio"]
-            sample_rate = result.get("sampling_rate", 22050)
-            print(f"TTS: Generated audio array of length {len(audio_array)} at {sample_rate}Hz")
-            # Validate audio array
-            if len(audio_array) == 0:
-                print("TTS: Warning - Generated audio array is empty")
-                return None
-            # Check for potential issues with audio data
-            audio_min = np.min(audio_array)
-            audio_max = np.max(audio_array)
-            audio_rms = np.sqrt(np.mean(audio_array**2))
-            print(f"TTS: Audio statistics - Min: {audio_min:.4f}, Max: {audio_max:.4f}, RMS: {audio_rms:.4f}")
-            # Check if audio might be silent or corrupted
-            if audio_rms < 0.001:
-                print("TTS: Warning - Audio appears to be very quiet or silent")
-            if audio_max > 1.0 or audio_min < -1.0:
-                print("TTS: Warning - Audio values outside expected range [-1, 1]")
-                # Clip to valid range
-                audio_array = np.clip(audio_array, -1.0, 1.0)
-                print("TTS: Clipped audio to valid range")
-            # Convert to WAV bytes with appropriate sample rate
-            if output_format == "wav":
-                # For Android: use 16kHz sample rate
-                target_sample_rate = 16000
-                wav_bytes = self._convert_to_wav_bytes(audio_array, sample_rate)
-                print(f"TTS: Converted to WAV: {len(wav_bytes)} bytes")
-                # Convert sample rate to 16kHz if needed for Android compatibility
-                if sample_rate != target_sample_rate:
-                    print(f"TTS: Converting sample rate from {sample_rate}Hz to {target_sample_rate}Hz for Android compatibility")
-                    wav_bytes = await self._resample_wav_to_16khz(wav_bytes, sample_rate)
-                    print(f"TTS: Resampled WAV: {len(wav_bytes)} bytes")
-                print(f"TTS: Generated {len(wav_bytes)} bytes of WAV audio for '{text}'")
-                print(f"=== TTS GENERATION COMPLETE ===")
-                return wav_bytes
-            else:
-                # For web: use original sample rate and convert to WebM
-                wav_bytes = self._convert_to_wav_bytes(audio_array, sample_rate)
-                print(f"TTS: Converted to WAV: {len(wav_bytes)} bytes")
-                # Convert to WebM format for web compatibility
-                webm_bytes = await self._convert_to_webm(wav_bytes)
-                print(f"TTS: Generated {len(webm_bytes)} bytes of WebM audio for '{text}'")
-                print(f"=== TTS GENERATION COMPLETE ===")
-                return webm_bytes
-        except Exception as e:
-            print(f"TTS: TTS generation error: {e}")
-            import traceback
-            traceback.print_exc()
-            return None
-    async def generate_speech_dual_format(self, text: str, language_code: str) -> tuple[Optional[bytes], Optional[bytes]]:
-        """Generate speech audio in both WebM and WAV formats
-        Args:
-            text: Text to convert to speech
-            language_code: Language code for TTS model
-        Returns:
-            Tuple of (webm_bytes, wav_bytes), either can be None if generation fails
-        """
-        try:
-            print(f"=== TTS DUAL FORMAT GENERATION REQUEST ===")
-            print(f"Text: '{text}'")
-            print(f"Language: {language_code}")
-            # Input validation: Check for invalid or problematic text
-            if not text or not text.strip():
-                print("TTS: Empty or whitespace-only text, skipping TTS generation")
-                return None, None
-            # Check for very short text that might cause issues
-            clean_text = text.strip()
-            if len(clean_text) <= 2 and clean_text in [".", ",", "!", "?", ":", ";", "-"]:
-                print(f"TTS: Text '{clean_text}' is too short or punctuation-only, skipping TTS generation")
-                return None, None
-            # Check for minimum meaningful length
-            if len(clean_text.replace(" ", "").replace(".", "").replace(",", "")) < 2:
-                print(f"TTS: Text '{clean_text}' has insufficient content for TTS, skipping")
-                return None, None
-            # Check if the language is supported
-            if language_code not in self.tts_config and language_code not in self.fallback_tts_config:
-                print(f"TTS: Language {language_code} not configured for TTS")
-                return None, None
-            await self.ensure_model_loaded(language_code)
-            if language_code not in self.tts_pipelines:
-                print(f"TTS: TTS model not available for language: {language_code}")
-                return None, None
-            print(f"TTS: Generating speech for '{text}' in {language_code}")
-            # Generate speech once
-            pipeline_obj = self.tts_pipelines[language_code]
-            result = pipeline_obj(text)
-            audio_array = result["audio"]
-            sample_rate = result.get("sampling_rate", 22050)
-            print(f"TTS: Generated audio array of length {len(audio_array)} at {sample_rate}Hz")
-            # Validate audio array
-            if len(audio_array) == 0:
-                print("TTS: Warning - Generated audio array is empty")
-                return None, None
-            # Check for potential issues with audio data
-            audio_min = np.min(audio_array)
-            audio_max = np.max(audio_array)
-            audio_rms = np.sqrt(np.mean(audio_array**2))
-            print(f"TTS: Audio statistics - Min: {audio_min:.4f}, Max: {audio_max:.4f}, RMS: {audio_rms:.4f}")
-            # Check if audio might be silent or corrupted
-            if audio_rms < 0.001:
-                print("TTS: Warning - Audio appears to be very quiet or silent")
-            if audio_max > 1.0 or audio_min < -1.0:
-                print("TTS: Warning - Audio values outside expected range [-1, 1]")
-                # Clip to valid range
-                audio_array = np.clip(audio_array, -1.0, 1.0)
-                print("TTS: Clipped audio to valid range")
-            # Generate WAV at original sample rate first
-            wav_bytes_original = self._convert_to_wav_bytes(audio_array, sample_rate)
-            print(f"TTS: Converted to WAV: {len(wav_bytes_original)} bytes")
-            # Generate WebM from original WAV
-            webm_bytes = await self._convert_to_webm(wav_bytes_original)
-            print(f"TTS: Converted to WebM: {len(webm_bytes)} bytes")
-            # Generate 16kHz WAV for Android
-            wav_bytes_16k = await self._resample_wav_to_16khz(wav_bytes_original, sample_rate)
-            print(f"TTS: Resampled to 16kHz WAV: {len(wav_bytes_16k)} bytes")
-            print(f"TTS: Generated dual format audio for '{text}'")
-            print(f"=== TTS DUAL FORMAT GENERATION COMPLETE ===")
-            return webm_bytes, wav_bytes_16k
-        except Exception as e:
-            print(f"TTS: Dual format TTS generation error: {e}")
-            import traceback
-            traceback.print_exc()
-            return None, None
-    def _convert_to_wav_bytes(self, audio_array: np.ndarray, sample_rate: int) -> bytes:
-        """Convert numpy audio array to WAV bytes"""
-        buffer = io.BytesIO()
-        with wave.open(buffer, 'wb') as wav_file:
-            wav_file.setnchannels(1)  # Mono
-            wav_file.setsampwidth(2)  # 16-bit
-            wav_file.setframerate(sample_rate)
-            # Ensure audio is in valid range [-1, 1]
-            audio_array = np.clip(audio_array, -1.0, 1.0)
-            # Convert to int16 with proper scaling
-            int16_audio = (audio_array * 32767).astype(np.int16)
-            # Validate the converted audio
-            print(f"TTS: Converting {len(audio_array)} samples to WAV at {sample_rate}Hz")
-            print(f"TTS: Int16 audio range: {np.min(int16_audio)} to {np.max(int16_audio)}")
-            wav_file.writeframes(int16_audio.tobytes())
-        wav_data = buffer.getvalue()
-        print(f"TTS: WAV file created: {len(wav_data)} bytes (expected header: 44 bytes + {len(int16_audio) * 2} data bytes)")
-        return wav_data
-    async def _resample_wav_to_16khz(self, wav_bytes: bytes, original_sample_rate: int) -> bytes:
-        """Resample WAV audio to 16kHz using FFmpeg"""
-        try:
-            process = subprocess.Popen([
-                "ffmpeg", "-f", "wav", "-i", "pipe:0",
-                "-ar", "16000",  # Set output sample rate to 16kHz
-                "-ac", "1",      # Ensure mono output
-                "-f", "wav", "pipe:1"
-            ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            resampled_data, stderr = process.communicate(input=wav_bytes)
-            if process.returncode != 0:
-                print(f"TTS: FFmpeg resampling error: {stderr.decode()}")
-                return wav_bytes  # Return original if resampling fails
-            return resampled_data
-        except Exception as e:
-            print(f"TTS: Resampling error: {e}")
-            return wav_bytes  # Return original if resampling fails
-    async def _convert_to_webm(self, wav_bytes: bytes) -> bytes:
-        """Convert WAV bytes to WebM format using FFmpeg"""
-        try:
-            process = subprocess.Popen([
-                "ffmpeg", "-f", "wav", "-i", "pipe:0",
-                "-c:a", "libopus", "-b:a", "64k",
-                "-f", "webm", "pipe:1"
-            ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            webm_data, stderr = process.communicate(input=wav_bytes)
-            if process.returncode != 0:
-                print(f"TTS: FFmpeg error: {stderr.decode()}")
-                return wav_bytes  # Return original WAV if conversion fails
-            return webm_data
-        except Exception as e:
-            print(f"TTS: WebM conversion error: {e}")
-            return wav_bytes  # Return original WAV if conversion fails
-    async def load_remaining_models_in_background(self):
-        """Load all remaining TTS models in the background after startup"""
-        try:
-            print("TTS: Starting background loading of additional voice models...")
-            # Load primary models first
-            for lang_code in self.tts_config.keys():
-                if lang_code not in self.preload_languages and lang_code not in self.tts_pipelines:
-                    if self.espeak_available:
-                        try:
-                            print(f"TTS: Background loading primary model for {lang_code}...")
-                            self.models_loading_status[lang_code] = "loading"
-                            model_config = self.tts_config[lang_code]
-                            pipeline_obj = pipeline(
-                                "text-to-speech",
-                                model=model_config["model_repo"],
-                                device=self.device
-                            )
-                            self.tts_pipelines[lang_code] = pipeline_obj
-                            self.models_loading_status[lang_code] = "loaded"
-                            print(f"TTS: Successfully loaded primary model for {lang_code} in background")
-                            # Add a small delay between loading models
-                            await asyncio.sleep(2)
-                        except Exception as e:
-                            print(f"TTS: Failed to load primary model for {lang_code} in background: {e}")
-                            self.models_loading_status[lang_code] = "failed"
-            # Load fallback models for languages not yet loaded
-            for lang_code in self.fallback_tts_config.keys():
-                if lang_code not in self.tts_pipelines:
-                    try:
-                        print(f"TTS: Background loading fallback model for {lang_code}...")
-                        model_config = self.fallback_tts_config[lang_code]
-                        if model_config["model_type"] == "speecht5":
-                            from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor
-                            processor = SpeechT5Processor.from_pretrained(model_config["model_repo"])
-                            model = SpeechT5ForTextToSpeech.from_pretrained(model_config["model_repo"])
-                            vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-                            if self.device >= 0:
-                                model = model.to(f"cuda:{self.device}")
-                                vocoder = vocoder.to(f"cuda:{self.device}")
-                            self.tts_pipelines[lang_code] = {
-                                "type": "speecht5",
-                                "processor": processor,
-                                "model": model,
-                                "vocoder": vocoder
-                            }
-                        else:
-                            pipeline_obj = pipeline(
-                                "text-to-speech",
-                                model=model_config["model_repo"],
-                                device=self.device
-                            )
-                            self.tts_pipelines[lang_code] = pipeline_obj
-                        print(f"TTS: Successfully loaded fallback model for {lang_code} in background")
-                        await asyncio.sleep(2)
-                    except Exception as e:
-                        print(f"TTS: Failed to load fallback model for {lang_code}: {e}")
-            print("TTS: Background loading of all voice models complete")
-            print(f"TTS: Loaded models: {list(self.tts_pipelines.keys())}")
-        except Exception as e:
-            print(f"TTS: Error in background model loading: {e}")
-    def start_background_loading(self):
-        """Start background loading of models as a non-blocking task"""
-        if self.background_loading_task is None:
-            self.background_loading_task = asyncio.create_task(self.load_remaining_models_in_background())
-            print("TTS: Background model loading task started")
-    async def cleanup(self):
-        """Cleanup resources"""
-        # Cancel background loading if still running
-        if self.background_loading_task and not self.background_loading_task.done():
-            self.background_loading_task.cancel()
-            try:
-                await self.background_loading_task
-            except asyncio.CancelledError:
-                pass
-        self.tts_pipelines.clear()
-        print("TTS: TTS service cleaned up")

app/services/tts_service_onnx.py DELETED Viewed

@@ -1,587 +0,0 @@
-import asyncio
-import io
-import wave
-import numpy as np
-import subprocess
-from typing import Dict, Optional
-import onnxruntime as ort
-from transformers import AutoProcessor
-from collections import OrderedDict
-import os
-class ONNXTTSService:
-    def __init__(self):
-        self.tts_models: Dict[str, any] = {}
-        self.processors: Dict[str, any] = {}
-        self.max_tts_models = 3  # Keep up to 3 TTS models in memory
-        self.model_cache = OrderedDict()  # LRU cache
-        # GPU optimization - detect and configure providers
-        available_providers = ort.get_available_providers()
-        print(f"ONNX TTS: Available providers: {available_providers}")
-        if 'CUDAExecutionProvider' in available_providers:
-            # Configure CUDA provider with optimizations
-            cuda_provider_options = {
-                'device_id': 0,
-                'arena_extend_strategy': 'kNextPowerOfTwo',
-                'gpu_mem_limit': int(0.7 * 1024 * 1024 * 1024),  # 70% of GPU memory (TTS uses less than ASR)
-                'cudnn_conv_algo_search': 'EXHAUSTIVE',
-                'do_copy_in_default_stream': True,
-            }
-            self.providers = [('CUDAExecutionProvider', cuda_provider_options), 'CPUExecutionProvider']
-            print(f"ONNX TTS: Using CUDA acceleration with GPU memory limit: {cuda_provider_options['gpu_mem_limit'] // (1024**3)}GB")
-        else:
-            self.providers = ['CPUExecutionProvider']
-            print("ONNX TTS: CUDA not available, using CPU execution")
-        print(f"ONNX TTS: Configured providers: {[p[0] if isinstance(p, tuple) else p for p in self.providers]}")
-        # Check if espeak is available
-        self.espeak_available = self._check_espeak_availability()
-        # ONNX TTS model configurations - using FP32 optimized models (16kHz corrected)
-        self.tts_config = {
-            "kik": {"model_repo": "mutisya/vits-tts-onnx-fp32-kikuyu-v25-37-1", "model_type": "vits", "use_onnx": True},
-            "luo": {"model_repo": "mutisya/vits-tts-onnx-fp32-luo-v25-37-1", "model_type": "vits", "use_onnx": True},
-            "kam": {"model_repo": "mutisya/vits-tts-onnx-fp32-kamba-v25-37-1", "model_type": "vits", "use_onnx": True},
-            "mer": {"model_repo": "mutisya/vits-tts-onnx-fp32-kimeru-v25-37-1", "model_type": "vits", "use_onnx": True},
-            "som": {"model_repo": "mutisya/vits-tts-onnx-fp32-somali-v25-37-1", "model_type": "vits", "use_onnx": True},
-            "swa": {"model_repo": "mutisya/vits-tts-onnx-fp32-swahili-v25-37-1", "model_type": "vits", "use_onnx": True},
-            "eng": {"model_repo": "kakao-enterprise/vits-ljs", "model_type": "vits", "use_onnx": False},  # Fallback to PyTorch
-        }
-        # Alternative TTS models that don't require espeak (fallback)
-        self.fallback_tts_config = {
-            "eng": {"model_repo": "microsoft/speecht5_tts", "model_type": "speecht5"},
-            "swa": {"model_repo": "facebook/mms-tts-swh", "model_type": "mms"},
-            "som": {"model_repo": "facebook/mms-tts-som", "model_type": "mms"},
-        }
-        self.preload_languages = ["kik", "swa"]
-    def _check_espeak_availability(self) -> bool:
-        """Check if espeak is available on the system"""
-        try:
-            result = subprocess.run(['espeak', '--version'],
-                                  capture_output=True, text=True, timeout=5)
-            if result.returncode == 0:
-                print("ONNX TTS: espeak is available")
-                return True
-            else:
-                print("ONNX TTS: espeak command failed")
-                return False
-        except (subprocess.TimeoutExpired, FileNotFoundError, Exception) as e:
-            print(f"ONNX TTS: espeak not available: {e}")
-            return False
-    async def initialize(self):
-        """Initialize TTS models for preloaded languages"""
-        print("ONNX TTS: Initializing TTS service with ONNX Runtime...")
-        print(f"ONNX TTS: espeak available: {self.espeak_available}")
-        print(f"ONNX TTS: Using providers: {self.providers}")
-        for lang_code in self.preload_languages:
-            await self.ensure_model_loaded(lang_code)
-    async def ensure_model_loaded(self, language_code: str):
-        """Load TTS model for language if not already loaded with LRU cache"""
-        if language_code in self.model_cache:
-            # Move to end (most recently used)
-            self.model_cache.move_to_end(language_code)
-            return
-        # Check if we need to evict old models
-        while len(self.model_cache) >= self.max_tts_models:
-            # Remove least recently used model
-            old_lang, _ = self.model_cache.popitem(last=False)
-            if old_lang in self.tts_models:
-                del self.tts_models[old_lang]
-            if old_lang in self.processors:
-                del self.processors[old_lang]
-            print(f"ONNX TTS: Evicted model for {old_lang} (LRU cache)")
-        # First try to load ONNX model
-        if language_code in self.tts_config:
-            model_config = self.tts_config[language_code]
-            if model_config.get("use_onnx", False):
-                try:
-                    print(f"ONNX TTS: Loading ONNX model for {language_code}")
-                    # Create ONNX session with optimizations and verbose logging
-                    session_options = ort.SessionOptions()
-                    session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-                    # Enable verbose logging to diagnose operator assignments
-                    session_options.log_severity_level = 1  # WARNING level for detailed logs
-                    session_options.logid = "ONNX_TTS"      # Prefix for log identification
-                    # GPU memory optimization for T4 with diagnostic tracing
-                    if 'CUDAExecutionProvider' in self.providers:
-                        provider_options = [{
-                            'device_id': 0,
-                            'arena_extend_strategy': 'kSameAsRequested',
-                            'gpu_mem_limit': int(0.3 * 1024 * 1024 * 1024),  # 30% of GPU memory for TTS
-                            'cudnn_conv_algo_search': 'EXHAUSTIVE',
-                            'do_copy_in_default_stream': True,
-                            'enable_tracing': True,  # Enable tracing for better diagnostics
-                        }]
-                        providers = [('CUDAExecutionProvider', provider_options[0]), 'CPUExecutionProvider']
-                    else:
-                        providers = self.providers
-                    # Get authentication token for private repos
-                    import os
-                    auth_token = os.getenv('HUGGING_FACE_HUB_TOKEN') or os.getenv('HF_TOKEN')
-                    # Download ONNX model from HuggingFace Hub with authentication
-                    from huggingface_hub import hf_hub_download
-                    onnx_path = hf_hub_download(
-                        repo_id=model_config["model_repo"],
-                        filename="model.onnx",
-                        token=auth_token
-                    )
-                    session = ort.InferenceSession(onnx_path, providers=providers, sess_options=session_options)
-                    # Load processor for preprocessing with authentication
-                    processor = AutoProcessor.from_pretrained(
-                        model_config["model_repo"],
-                        token=auth_token
-                    )
-                    self.tts_models[language_code] = session
-                    self.processors[language_code] = processor
-                    self.model_cache[language_code] = True
-                    print(f"ONNX TTS: Successfully loaded ONNX model for {language_code}")
-                    return
-                except Exception as e:
-                    print(f"ONNX TTS: Failed to load ONNX model for {language_code}: {e}")
-                    # Continue to try fallback models
-            else:
-                # Try PyTorch model if ONNX not available
-                try:
-                    print(f"ONNX TTS: Loading PyTorch model for {language_code} (fallback)")
-                    from transformers import pipeline
-                    pipeline_obj = pipeline(
-                        "text-to-speech",
-                        model=model_config["model_repo"],
-                        device=0 if self.providers[0] == 'CUDAExecutionProvider' else -1
-                    )
-                    self.tts_models[language_code] = pipeline_obj
-                    self.processors[language_code] = None  # Not needed for pipeline
-                    self.model_cache[language_code] = True
-                    print(f"ONNX TTS: Successfully loaded PyTorch model for {language_code}")
-                    return
-                except Exception as e:
-                    print(f"ONNX TTS: Failed to load PyTorch model for {language_code}: {e}")
-        # Try fallback models if primary failed
-        if language_code in self.fallback_tts_config:
-            try:
-                model_config = self.fallback_tts_config[language_code]
-                if model_config["model_type"] == "speecht5":
-                    # Special handling for SpeechT5
-                    from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
-                    import torch
-                    # Get authentication token for private repos
-                    import os
-                    auth_token = os.getenv('HUGGING_FACE_HUB_TOKEN') or os.getenv('HF_TOKEN')
-                    processor = SpeechT5Processor.from_pretrained(
-                        model_config["model_repo"],
-                        token=auth_token
-                    )
-                    model = SpeechT5ForTextToSpeech.from_pretrained(
-                        model_config["model_repo"],
-                        token=auth_token
-                    )
-                    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
-                    # Create a custom pipeline-like object
-                    class SpeechT5Pipeline:
-                        def __init__(self, processor, model, vocoder):
-                            self.processor = processor
-                            self.model = model
-                            self.vocoder = vocoder
-                        def __call__(self, text):
-                            inputs = self.processor(text=text, return_tensors="pt")
-                            # Use default speaker embeddings
-                            import datasets
-                            embeddings_dataset = datasets.load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
-                            speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
-                            speech = self.model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=self.vocoder)
-                            return {
-                                "audio": speech.numpy(),
-                                "sampling_rate": 16000
-                            }
-                    pipeline_obj = SpeechT5Pipeline(processor, model, vocoder)
-                else:
-                    # Standard pipeline for MMS models
-                    from transformers import pipeline
-                    pipeline_obj = pipeline(
-                        "text-to-speech",
-                        model=model_config["model_repo"],
-                        device=0 if self.providers[0] == 'CUDAExecutionProvider' else -1
-                    )
-                self.tts_models[language_code] = pipeline_obj
-                self.processors[language_code] = None
-                self.model_cache[language_code] = True
-                print(f"ONNX TTS: Successfully loaded fallback model for {language_code}")
-                return
-            except Exception as e:
-                print(f"ONNX TTS: Failed to load fallback TTS model for {language_code}: {e}")
-        print(f"ONNX TTS: No TTS model available for language: {language_code}")
-    async def generate_speech(self, text: str, language_code: str, output_format: str = "webm") -> Optional[bytes]:
-        """Generate speech audio from text using ONNX models
-        Args:
-            text: Text to convert to speech
-            language_code: Language code for TTS model
-            output_format: Output format - "webm" (default, web-compatible) or "wav" (Android-compatible)
-        Returns:
-            Audio bytes in the requested format, or None if generation fails
-        """
-        try:
-            print(f"=== ONNX TTS GENERATION REQUEST ===")
-            print(f"Text: '{text}'")
-            print(f"Language: {language_code}")
-            print(f"Output format: {output_format}")
-            # Input validation
-            if not text or not text.strip():
-                print("ONNX TTS: Empty or whitespace-only text, skipping TTS generation")
-                return None
-            # Check for very short text that might cause issues
-            clean_text = text.strip()
-            if len(clean_text) <= 2 and clean_text in [".", ",", "!", "?", ":", ";", "-"]:
-                print(f"ONNX TTS: Text '{clean_text}' is too short or punctuation-only, skipping TTS generation")
-                return None
-            # Check for minimum meaningful length
-            if len(clean_text.replace(" ", "").replace(".", "").replace(",", "")) < 2:
-                print(f"ONNX TTS: Text '{clean_text}' has insufficient content for TTS, skipping")
-                return None
-            # Check if the language is supported
-            if language_code not in self.tts_config and language_code not in self.fallback_tts_config:
-                print(f"ONNX TTS: Language {language_code} not configured for TTS")
-                return None
-            await self.ensure_model_loaded(language_code)
-            if language_code not in self.tts_models:
-                print(f"ONNX TTS: TTS model not available for language: {language_code}")
-                return None
-            print(f"ONNX TTS: Generating speech for '{text}' in {language_code}")
-            # Generate speech based on model type
-            model_config = self.tts_config.get(language_code, {})
-            if model_config.get("use_onnx", False):
-                # ONNX inference
-                audio_array, sample_rate = await self._run_onnx_tts_inference(text, language_code)
-            else:
-                # PyTorch pipeline inference
-                pipeline_obj = self.tts_models[language_code]
-                result = pipeline_obj(text)
-                audio_array = result["audio"]
-                sample_rate = result.get("sampling_rate", 16000)  # Default to 16kHz (corrected)
-            print(f"ONNX TTS: Generated audio array of length {len(audio_array)} at {sample_rate}Hz")
-            # Validate audio array
-            if len(audio_array) == 0:
-                print("ONNX TTS: Warning - Generated audio array is empty")
-                return None
-            # Check audio statistics
-            audio_min = np.min(audio_array)
-            audio_max = np.max(audio_array)
-            audio_rms = np.sqrt(np.mean(audio_array**2))
-            print(f"ONNX TTS: Audio statistics - Min: {audio_min:.4f}, Max: {audio_max:.4f}, RMS: {audio_rms:.4f}")
-            # Check if audio might be silent or corrupted
-            if audio_rms < 0.001:
-                print("ONNX TTS: Warning - Audio appears to be very quiet or silent")
-            if audio_max > 1.0 or audio_min < -1.0:
-                print("ONNX TTS: Warning - Audio values outside expected range [-1, 1]")
-                # Clip to valid range
-                audio_array = np.clip(audio_array, -1.0, 1.0)
-                print("ONNX TTS: Clipped audio to valid range")
-            # Convert to requested format
-            if output_format == "wav":
-                # For Android: use 16kHz sample rate
-                target_sample_rate = 16000
-                wav_bytes = self._convert_to_wav_bytes(audio_array, sample_rate)
-                print(f"ONNX TTS: Converted to WAV: {len(wav_bytes)} bytes")
-                # Convert sample rate to 16kHz if needed for Android compatibility
-                if sample_rate != target_sample_rate:
-                    print(f"ONNX TTS: Converting sample rate from {sample_rate}Hz to {target_sample_rate}Hz")
-                    wav_bytes = await self._resample_wav_to_16khz(wav_bytes, sample_rate)
-                    print(f"ONNX TTS: Resampled WAV: {len(wav_bytes)} bytes")
-                print(f"ONNX TTS: Generated {len(wav_bytes)} bytes of WAV audio for '{text}'")
-                print(f"=== ONNX TTS GENERATION COMPLETE ===")
-                return wav_bytes
-            else:
-                # For web: use original sample rate and convert to WebM
-                wav_bytes = self._convert_to_wav_bytes(audio_array, sample_rate)
-                print(f"ONNX TTS: Converted to WAV: {len(wav_bytes)} bytes")
-                # Convert to WebM format for web compatibility
-                webm_bytes = await self._convert_to_webm(wav_bytes)
-                print(f"ONNX TTS: Generated {len(webm_bytes)} bytes of WebM audio for '{text}'")
-                print(f"=== ONNX TTS GENERATION COMPLETE ===")
-                return webm_bytes
-        except Exception as e:
-            print(f"ONNX TTS: TTS generation error: {e}")
-            import traceback
-            traceback.print_exc()
-            return None
-    async def _run_onnx_tts_inference(self, text: str, language_code: str) -> tuple[np.ndarray, int]:
-        """Run ONNX inference for text-to-speech"""
-        try:
-            session = self.tts_models[language_code]
-            processor = self.processors[language_code]
-            # Preprocess text
-            inputs = processor(text=text, return_tensors="np")
-            # Get input names for ONNX session
-            input_names = [inp.name for inp in session.get_inputs()]
-            # Prepare inputs for ONNX
-            onnx_inputs = {}
-            for name in input_names:
-                if name in inputs:
-                    onnx_inputs[name] = inputs[name]
-                elif name == "input_ids" and "input_ids" in inputs:
-                    onnx_inputs[name] = inputs["input_ids"].astype(np.int64)
-                elif name == "attention_mask" and "attention_mask" in inputs:
-                    onnx_inputs[name] = inputs["attention_mask"].astype(np.int64)
-            # Run ONNX inference
-            outputs = session.run(None, onnx_inputs)
-            # Extract audio from outputs (assuming first output is audio)
-            audio_array = outputs[0]
-            # Ensure audio is 1D
-            if audio_array.ndim > 1:
-                audio_array = audio_array.flatten()
-            # Convert to float32 if needed
-            if audio_array.dtype != np.float32:
-                audio_array = audio_array.astype(np.float32)
-            # Sample rate is 16kHz for our corrected models
-            sample_rate = 16000
-            return audio_array, sample_rate
-        except Exception as e:
-            print(f"ONNX TTS: Inference error: {e}")
-            import traceback
-            traceback.print_exc()
-            return np.array([], dtype=np.float32), 16000
-    async def generate_speech_dual_format(self, text: str, language_code: str) -> tuple[Optional[bytes], Optional[bytes]]:
-        """Generate speech audio in both WebM and WAV formats using ONNX
-        Args:
-            text: Text to convert to speech
-            language_code: Language code for TTS model
-        Returns:
-            Tuple of (webm_bytes, wav_bytes), either can be None if generation fails
-        """
-        try:
-            print(f"=== ONNX TTS DUAL FORMAT GENERATION REQUEST ===")
-            print(f"Text: '{text}'")
-            print(f"Language: {language_code}")
-            # Input validation
-            if not text or not text.strip():
-                print("ONNX TTS: Empty or whitespace-only text, skipping TTS generation")
-                return None, None
-            clean_text = text.strip()
-            if len(clean_text) <= 2 and clean_text in [".", ",", "!", "?", ":", ";", "-"]:
-                print(f"ONNX TTS: Text '{clean_text}' is too short or punctuation-only, skipping TTS generation")
-                return None, None
-            if len(clean_text.replace(" ", "").replace(".", "").replace(",", "")) < 2:
-                print(f"ONNX TTS: Text '{clean_text}' has insufficient content for TTS, skipping")
-                return None, None
-            # Check if the language is supported
-            if language_code not in self.tts_config and language_code not in self.fallback_tts_config:
-                print(f"ONNX TTS: Language {language_code} not configured for TTS")
-                return None, None
-            await self.ensure_model_loaded(language_code)
-            if language_code not in self.tts_models:
-                print(f"ONNX TTS: TTS model not available for language: {language_code}")
-                return None, None
-            print(f"ONNX TTS: Generating speech for '{text}' in {language_code}")
-            # Generate speech once
-            model_config = self.tts_config.get(language_code, {})
-            if model_config.get("use_onnx", False):
-                # ONNX inference
-                audio_array, sample_rate = await self._run_onnx_tts_inference(text, language_code)
-            else:
-                # PyTorch pipeline inference
-                pipeline_obj = self.tts_models[language_code]
-                result = pipeline_obj(text)
-                audio_array = result["audio"]
-                sample_rate = result.get("sampling_rate", 16000)
-            print(f"ONNX TTS: Generated audio array of length {len(audio_array)} at {sample_rate}Hz")
-            # Validate audio array
-            if len(audio_array) == 0:
-                print("ONNX TTS: Warning - Generated audio array is empty")
-                return None, None
-            # Check for potential issues with audio data
-            audio_min = np.min(audio_array)
-            audio_max = np.max(audio_array)
-            audio_rms = np.sqrt(np.mean(audio_array**2))
-            print(f"ONNX TTS: Audio statistics - Min: {audio_min:.4f}, Max: {audio_max:.4f}, RMS: {audio_rms:.4f}")
-            if audio_rms < 0.001:
-                print("ONNX TTS: Warning - Audio appears to be very quiet or silent")
-            if audio_max > 1.0 or audio_min < -1.0:
-                print("ONNX TTS: Warning - Audio values outside expected range [-1, 1]")
-                audio_array = np.clip(audio_array, -1.0, 1.0)
-                print("ONNX TTS: Clipped audio to valid range")
-            # Generate WAV at original sample rate first
-            wav_bytes_original = self._convert_to_wav_bytes(audio_array, sample_rate)
-            print(f"ONNX TTS: Converted to WAV: {len(wav_bytes_original)} bytes")
-            # Generate WebM from original WAV
-            webm_bytes = await self._convert_to_webm(wav_bytes_original)
-            print(f"ONNX TTS: Converted to WebM: {len(webm_bytes)} bytes")
-            # Generate 16kHz WAV for Android
-            wav_bytes_16k = await self._resample_wav_to_16khz(wav_bytes_original, sample_rate)
-            print(f"ONNX TTS: Resampled to 16kHz WAV: {len(wav_bytes_16k)} bytes")
-            print(f"ONNX TTS: Generated dual format audio for '{text}'")
-            print(f"=== ONNX TTS DUAL FORMAT GENERATION COMPLETE ===")
-            return webm_bytes, wav_bytes_16k
-        except Exception as e:
-            print(f"ONNX TTS: Dual format TTS generation error: {e}")
-            import traceback
-            traceback.print_exc()
-            return None, None
-    def _convert_to_wav_bytes(self, audio_array: np.ndarray, sample_rate: int) -> bytes:
-        """Convert numpy audio array to WAV bytes"""
-        buffer = io.BytesIO()
-        with wave.open(buffer, 'wb') as wav_file:
-            wav_file.setnchannels(1)  # Mono
-            wav_file.setsampwidth(2)  # 16-bit
-            wav_file.setframerate(sample_rate)
-            # Ensure audio is in valid range [-1, 1]
-            audio_array = np.clip(audio_array, -1.0, 1.0)
-            # Convert to int16 with proper scaling
-            int16_audio = (audio_array * 32767).astype(np.int16)
-            # Validate the converted audio
-            print(f"ONNX TTS: Converting {len(audio_array)} samples to WAV at {sample_rate}Hz")
-            print(f"ONNX TTS: Int16 audio range: {np.min(int16_audio)} to {np.max(int16_audio)}")
-            wav_file.writeframes(int16_audio.tobytes())
-        wav_data = buffer.getvalue()
-        print(f"ONNX TTS: WAV file created: {len(wav_data)} bytes")
-        return wav_data
-    async def _resample_wav_to_16khz(self, wav_bytes: bytes, original_sample_rate: int) -> bytes:
-        """Resample WAV audio to 16kHz using FFmpeg"""
-        try:
-            process = subprocess.Popen([
-                "ffmpeg", "-f", "wav", "-i", "pipe:0",
-                "-ar", "16000",  # Set output sample rate to 16kHz
-                "-ac", "1",      # Ensure mono output
-                "-f", "wav", "pipe:1"
-            ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            resampled_data, stderr = process.communicate(input=wav_bytes)
-            if process.returncode != 0:
-                print(f"ONNX TTS: FFmpeg resampling error: {stderr.decode()}")
-                return wav_bytes  # Return original if resampling fails
-            return resampled_data
-        except Exception as e:
-            print(f"ONNX TTS: Resampling error: {e}")
-            return wav_bytes  # Return original if resampling fails
-    async def _convert_to_webm(self, wav_bytes: bytes) -> bytes:
-        """Convert WAV bytes to WebM format using FFmpeg"""
-        try:
-            process = subprocess.Popen([
-                "ffmpeg", "-f", "wav", "-i", "pipe:0",
-                "-c:a", "libopus", "-b:a", "64k",
-                "-f", "webm", "pipe:1"
-            ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            webm_data, stderr = process.communicate(input=wav_bytes)
-            if process.returncode != 0:
-                print(f"ONNX TTS: FFmpeg error: {stderr.decode()}")
-                return wav_bytes  # Return original WAV if conversion fails
-            return webm_data
-        except Exception as e:
-            print(f"ONNX TTS: WebM conversion error: {e}")
-            return wav_bytes  # Return original WAV if conversion fails
-    async def cleanup(self):
-        """Cleanup resources"""
-        self.tts_models.clear()
-        self.processors.clear()
-        self.model_cache.clear()
-        print("ONNX TTS: TTS service cleaned up")

app/services/websocket_manager.py DELETED Viewed

@@ -1,909 +0,0 @@
-import asyncio
-import uuid
-from typing import Dict, Set, Optional
-import socketio
-import numpy as np
-from app.models import Message, LanguageCode
-from app.services.session_manager import SessionManager, LANGUAGE_MAP
-from app.services.transcription_service import TranscriptionService
-from app.services.translation_service import TranslationService
-from app.services.tts_service import TTSService
-def truncate_array_for_log(arr, max_items=10):
-    """Helper function to truncate arrays in log messages for readability"""
-    if not arr or len(arr) <= max_items:
-        return arr
-    return arr[:max_items] + [f"... {len(arr) - max_items} more items"]
-class WebSocketManager:
-    def __init__(self, session_manager: SessionManager, transcription_service: TranscriptionService,
-                 translation_service: TranslationService, tts_service: TTSService):
-        self.session_manager = session_manager
-        self.transcription_service = transcription_service
-        self.translation_service = translation_service
-        self.tts_service = tts_service
-        self.sio = None  # Will be set by main.py
-        self.client_sessions: Dict[str, str] = {}  # sid -> session_id
-        self.client_participants: Dict[str, str] = {}  # sid -> participant_id
-        self.session_clients: Dict[str, Set[str]] = {}  # session_id -> set of sids
-        self.messages: Dict[str, Message] = {}  # message_id -> message
-        self.participant_current_message: Dict[str, str] = {}  # participant_id -> current_message_id
-        self.processed_messages: Set[str] = set()  # Track processed message IDs to prevent duplicates
-    def set_socketio(self, sio):
-        """Set the Socket.IO server instance"""
-        self.sio = sio
-    async def handle_join_session(self, sid: str, data: dict):
-        """Handle participant joining a session"""
-        try:
-            session_id = data.get('sessionId')
-            participant_name = data.get('participantName')
-            language_code = data.get('language')
-            print(f"=== JOIN SESSION REQUEST ===")
-            print(f"Session ID: {session_id}")
-            print(f"Participant: {participant_name}")
-            print(f"Language: {language_code}")
-            if not all([session_id, participant_name, language_code]):
-                await self._emit_error(sid, "Missing required fields")
-                return
-            # Validate language code
-            try:
-                lang_enum = LanguageCode(language_code)
-                print(f"Language code validated: {lang_enum}")
-            except ValueError:
-                await self._emit_error(sid, f"Invalid language code: {language_code}")
-                return
-            # Resolve session ID (in case it's a short code)
-            session = await self.session_manager.get_session(session_id)
-            if not session:
-                await self._emit_error(sid, "Session not found")
-                return
-            # Use the full UUID for all subsequent operations
-            session_id = session.id
-            print(f"Resolved session ID: {session_id}")
-            # Add participant to session
-            participant = await self.session_manager.add_participant(
-                session_id, participant_name, lang_enum
-            )
-            print(f"Participant created: {participant}")
-            if not participant:
-                await self._emit_error(sid, "Session not found or unable to join")
-                return
-            # Get updated session info
-            session = await self.session_manager.get_session(session_id)
-            if session:
-                print(f"Session {session_id} now has {len(session.languages)} languages: {[f'{lang.name}({lang.code.value})' for lang in session.languages]}")
-                print(f"Session participants: {[f'{p.name}({p.language.name})' for p in session.participants]}")
-            # Track client connections
-            self.client_sessions[sid] = session_id
-            self.client_participants[sid] = participant.id
-            if session_id not in self.session_clients:
-                self.session_clients[session_id] = set()
-            self.session_clients[session_id].add(sid)
-            # Send success response
-            await self.sio.emit('participant_joined', participant.dict(), room=sid)
-            # Notify other participants
-            await self._broadcast_to_session(session_id, 'participant_update', participant.dict(), exclude_sid=sid)
-            print(f"=== JOIN SESSION COMPLETE ===")
-        except Exception as e:
-            print(f"Error in handle_join_session: {e}")
-            import traceback
-            traceback.print_exc()
-            await self._emit_error(sid, "Failed to join session")
-    async def handle_join_hub(self, sid: str, data: dict):
-        """Handle hub joining a session for observation"""
-        try:
-            session_id = data.get('sessionId')
-            if not session_id:
-                await self._emit_error(sid, "Missing sessionId for hub")
-                return
-            # Verify session exists
-            session = await self.session_manager.get_session(session_id)
-            if not session:
-                await self._emit_error(sid, "Session not found")
-                return
-            # Track hub connection
-            self.client_sessions[sid] = session_id
-            if session_id not in self.session_clients:
-                self.session_clients[session_id] = set()
-            self.session_clients[session_id].add(sid)
-            # Send success response
-            await self.sio.emit('hub_joined', {'sessionId': session_id}, room=sid)
-            print(f"Hub joined session {session_id} with sid {sid}")
-        except Exception as e:
-            print(f"Error in handle_join_hub: {e}")
-            await self._emit_error(sid, "Failed to join as hub")
-    async def handle_audio_chunk(self, sid: str, data: dict):
-        """Handle incoming audio chunk from participant"""
-        try:
-            participant_id = self.client_participants.get(sid)
-            if not participant_id:
-                return
-            audio_data = data.get('audioData', [])
-            is_pause_boundary = data.get('isPauseBoundary', False)
-            if not audio_data:
-                return
-            # Convert array to bytes
-            audio_bytes = bytes(audio_data)
-            # Process audio chunk using VAD-based approach
-            if audio_bytes:
-                # Check for voice activity in this chunk
-                has_voice = self.transcription_service.has_voice_activity(audio_bytes)
-                # Process the chunk (even if no voice to handle silence detection)
-                # If isPauseBoundary is True, force finalization by treating as silence
-                await self._process_audio_chunk_vad(participant_id, audio_bytes, has_voice and not is_pause_boundary, is_pause_boundary)
-        except Exception as e:
-            print(f"Error in handle_audio_chunk: {e}")
-            import traceback
-            traceback.print_exc()
-    async def handle_speaking_status(self, sid: str, data: dict):
-        """Handle speaking status updates"""
-        try:
-            participant_id = self.client_participants.get(sid)
-            if not participant_id:
-                return
-            is_speaking = data.get('isSpeaking', False)
-            await self.session_manager.update_participant_speaking_status(participant_id, is_speaking)
-            # If participant stopped speaking, force complete any pending sentence
-            if not is_speaking:
-                # Get session and participant info for force completion
-                session_id = await self.session_manager.get_participant_session_id(participant_id)
-                if session_id:
-                    session = await self.session_manager.get_session(session_id)
-                    participant = next((p for p in session.participants if p.id == participant_id), None)
-                    if participant:
-                        # Define the sentence callback for force completion
-                        async def force_sentence_callback(final_text: str, final_audio: bytes):
-                            # Create or get existing message
-                            current_message_id = self.participant_current_message.get(participant_id)
-                            if not current_message_id:
-                                current_message_id = str(uuid.uuid4())
-                            # Check if this message was already processed
-                            if current_message_id in self.processed_messages:
-                                print(f"Force completion: Message {current_message_id} already processed, skipping duplicate")
-                                return
-                            # Mark as processed to prevent duplicates
-                            self.processed_messages.add(current_message_id)
-                            from app.models import Message
-                            message = Message(
-                                id=current_message_id,
-                                session_id=session_id,
-                                speaker_id=participant_id,
-                                speaker_name=participant.name,
-                                original_text=final_text,
-                                original_language=participant.language,
-                                translations={},
-                                is_transcribing=False
-                            )
-                            self.messages[current_message_id] = message
-                            # Broadcast the completed message
-                            print(f"Force completion: Broadcasting message_complete for {current_message_id}: '{final_text}'")
-                            await self._broadcast_to_session(session_id, 'message_complete', {
-                                'messageId': current_message_id,
-                                'sessionId': session_id,
-                                'text': final_text,
-                                'speakerId': participant_id,
-                                'speakerName': participant.name,
-                                'language': participant.language.code.value
-                            })
-                            # Clear current message tracking
-                            if participant_id in self.participant_current_message:
-                                del self.participant_current_message[participant_id]
-                            # Start translation processing (non-blocking to allow continued audio processing)
-                            print("Starting TRANSLATION and TTS (background task)")
-                            asyncio.create_task(self._process_translations_and_tts(message, session))
-                        # Force complete any pending sentence
-                        await self.transcription_service.force_complete_sentence(
-                            participant_id,
-                            participant.language.code.value,
-                            force_sentence_callback
-                        )
-                # Clear transcription service buffers after force completion
-                self.transcription_service.clear_participant_buffers(participant_id)
-            # Broadcast speaking status to session
-            session_id = self.client_sessions.get(sid)
-            if session_id:
-                await self._broadcast_to_session(session_id, 'speaking_status', {
-                    'participantId': participant_id,
-                    'isSpeaking': is_speaking
-                })
-        except Exception as e:
-            print(f"Error in handle_speaking_status: {e}")
-            import traceback
-            traceback.print_exc()
-    async def handle_leave_session(self, sid: str, data: dict):
-        """Handle participant leaving a session"""
-        await self._cleanup_client(sid)
-    async def handle_disconnect(self, sid: str):
-        """Handle client disconnection"""
-        await self._cleanup_client(sid)
-    async def _process_audio_chunk_vad(self, participant_id: str, audio_data: bytes, has_voice_activity: bool, is_pause_boundary: bool = False):
-        """Process audio chunk using VAD-based sentence detection
-        Args:
-            participant_id: ID of the participant
-            audio_data: Raw audio data bytes
-            has_voice_activity: Whether voice activity was detected in this chunk
-            is_pause_boundary: If True, forces sentence finalization (from stop button or explicit pause)
-        """
-        try:
-            session_id = await self.session_manager.get_participant_session_id(participant_id)
-            if not session_id:
-                return
-            session = await self.session_manager.get_session(session_id)
-            if not session:
-                return
-            participant = next((p for p in session.participants if p.id == participant_id), None)
-            if not participant:
-                return
-            # Get or create current message for this participant
-            current_message_id = self.participant_current_message.get(participant_id)
-            if not current_message_id:
-                current_message_id = str(uuid.uuid4())
-                message = Message(
-                    id=current_message_id,
-                    session_id=session_id,
-                    speaker_id=participant_id,
-                    speaker_name=participant.name,
-                    original_text="",
-                    original_language=participant.language,
-                    translations={},
-                    is_transcribing=True
-                )
-                self.messages[current_message_id] = message
-                self.participant_current_message[participant_id] = current_message_id
-                # Start typing indicator
-                await self._broadcast_to_session(session_id, 'typing_start', {
-                    'speakerId': participant_id,
-                    'speakerName': participant.name,
-                    'languageCode': participant.language.code.value
-                })
-            message = self.messages[current_message_id]
-            # Define callbacks
-            async def on_progress(text: str, is_complete: bool):
-                """Called with in-progress transcription updates"""
-                # Update the message text even for progress updates
-                message.original_text = text
-                await self._broadcast_to_session(session_id, 'transcription_progress', {
-                    'messageId': current_message_id,
-                    'text': text,
-                    'isTranscribing': not is_complete,
-                    'speakerId': participant_id,
-                    'speakerName': participant.name
-                })
-            async def on_debug(debug_info: dict):
-                """Called with debug information from ASR (wav2vec2 models only)"""
-                # Prepare debug data for transmission
-                debug_data = {
-                    'messageId': current_message_id,
-                    'text': debug_info['text'],
-                    'timestamps': debug_info['timestamps'],
-                    'audioData': list(debug_info['audio_data']),
-                    'audioDuration': debug_info['audio_duration'],
-                    'modelType': debug_info['model_type'],
-                    'language': participant.language.code.value
-                }
-                await self._broadcast_to_session(session_id, 'transcription_debug', debug_data)
-            async def on_sentence_complete(final_text: str, final_audio: bytes):
-                """Called when a complete sentence is detected"""
-                # Check if this message was already processed
-                if current_message_id in self.processed_messages:
-                    print(f"Message {current_message_id} already processed, skipping duplicate")
-                    return
-                # Mark as processed to prevent duplicates
-                self.processed_messages.add(current_message_id)
-                message.original_text = final_text
-                message.is_transcribing = False
-                # Broadcast complete sentence with session ID
-                message_data = {
-                    'messageId': current_message_id,
-                    'sessionId': session_id,
-                    'text': final_text,
-                    'speakerId': participant_id,
-                    'speakerName': participant.name,
-                    'language': participant.language.code.value,
-                    'audioData': list(final_audio)
-                }
-                print(f"Broadcasting message_complete for {current_message_id}: '{final_text}'")
-                await self._broadcast_to_session(session_id, 'message_complete', message_data)
-                # Stop typing indicator
-                await self._broadcast_to_session(session_id, 'typing_stop', {
-                    'speakerId': participant_id
-                })
-                # Clear current message tracking
-                if participant_id in self.participant_current_message:
-                    del self.participant_current_message[participant_id]
-                # Start translation and TTS processing (non-blocking to allow continued audio processing)
-                print("Starting TRANSLATION and TTS (background task)")
-                asyncio.create_task(self._process_translations_and_tts(message, session))
-            # Process the audio chunk
-            result_text = await self.transcription_service.process_audio_chunk(
-                audio_data,
-                participant.language.code.value,
-                participant_id,
-                has_voice_activity,
-                progress_callback=on_progress,
-                sentence_callback=on_sentence_complete,
-                debug_callback=on_debug
-            )
-            # If this is a pause boundary (stop button clicked), force immediate finalization
-            if is_pause_boundary and participant_id in self.participant_current_message:
-                print(f"Pause boundary detected - forcing sentence finalization for participant {participant_id}")
-                # Get the current accumulated text from transcription service
-                if hasattr(self.transcription_service, 'candidate_text_cache') and participant_id in self.transcription_service.candidate_text_cache:
-                    final_text = self.transcription_service.candidate_text_cache.get(participant_id, "").strip()
-                    if final_text:  # Only finalize if there's actual text
-                        # Get accumulated audio
-                        final_audio = b""
-                        if hasattr(self.transcription_service, 'candidate_audio_buffers') and participant_id in self.transcription_service.candidate_audio_buffers:
-                            audio_array = self.transcription_service.candidate_audio_buffers.get(participant_id, np.array([]))
-                            if len(audio_array) > 0:
-                                # Convert float array to int16 bytes
-                                audio_int16 = (audio_array * 32767).astype(np.int16)
-                                final_audio = audio_int16.tobytes()
-                        # Trigger sentence completion
-                        await on_sentence_complete(final_text, final_audio)
-                        # Clear the buffers manually since we're forcing finalization
-                        if participant_id in self.transcription_service.candidate_text_cache:
-                            self.transcription_service.candidate_text_cache[participant_id] = ""
-                        if participant_id in self.transcription_service.candidate_audio_buffers:
-                            self.transcription_service.candidate_audio_buffers[participant_id] = np.array([], dtype=np.float32)
-                        if participant_id in self.transcription_service.silence_counters:
-                            self.transcription_service.silence_counters[participant_id] = 0
-                        if participant_id in self.transcription_service.sentence_finalized:
-                            self.transcription_service.sentence_finalized[participant_id] = False
-        except Exception as e:
-            print(f"Error in _process_audio_chunk_vad: {e}")
-            import traceback
-            traceback.print_exc()
-    async def _process_translations_and_tts(self, message: Message, session):
-        """Process translations and TTS for all session languages"""
-        try:
-            source_lang = message.original_language.name
-            print(f"=== TRANSLATION/TTS PROCESSING START ===")
-            print(f"Message ID: {message.id}")
-            print(f"Original message: '{message.original_text}'")
-            print(f"Original language: {message.original_language.name} ({message.original_language.code.value})")
-            print(f"Session languages: {[f'{lang.name} ({lang.code.value})' for lang in session.languages]}")
-            print(f"Session ID for verification: {session.id}")
-            # Create a mapping to track which audio belongs to which message and language
-            audio_tasks = []
-            # Check if TTS is enabled for this session
-            if session.enable_tts:
-                # First, generate TTS for the original message
-                print(f"TTS: Generating TTS for original message in {message.original_language.code.value}: '{message.original_text}'")
-                print(f"TTS Model: VITS ONNX (mutisya/vits-tts-onnx-fp32-{message.original_language.name.lower()}) - File: tts_service_onnx.py")
-                original_audio_task = asyncio.create_task(
-                    self.tts_service.generate_speech_dual_format(message.original_text, message.original_language.code.value)
-                )
-                audio_tasks.append((
-                    message.original_language.code.value,
-                    message.original_text,
-                    original_audio_task,
-                    True  # is_original
-                ))
-            else:
-                print(f"TTS: Skipping TTS generation (disabled for this session)")
-            # Process translations for each language in the session
-            print(f"Processing translations for {len(session.languages)} session languages...")
-            print(f"Session languages: {[f'{lang.name}({lang.code.value})' for lang in session.languages]}")
-            translation_tasks = []
-            for language in session.languages:
-                print(f"Checking language: {language.name} ({language.code.value})")
-                if language.code != message.original_language.code:
-                    print(f"TRANSLATING: '{message.original_text}' from {source_lang} to {language.name}")
-                    print(f"Translation Model: mutisya/nllb_600m (NLLB-600M) - File: translation_service.py")
-                    # Create translation task
-                    translation_task = asyncio.create_task(
-                        self.translation_service.translate_text(
-                            message.original_text, source_lang, language.name
-                        )
-                    )
-                    translation_tasks.append((language, translation_task))
-                else:
-                    print(f"SKIPPING translation for {language.name} (same as original language)")
-            print(f"Created {len(translation_tasks)} translation tasks for non-original languages")
-            # Wait for all translations to complete
-            for language, translation_task in translation_tasks:
-                try:
-                    translated_text = await translation_task
-                    if translated_text:
-                        print(f"TRANSLATION SUCCESS: '{translated_text}' for {language.name}")
-                        message.translations[language.code.value] = translated_text
-                        # Broadcast translation update to all clients
-                        await self._broadcast_to_session(message.session_id, 'translation_update', {
-                            'messageId': message.id,
-                            'targetLanguage': language.code.value,
-                            'translatedText': translated_text
-                        })
-                        # Check if TTS is enabled for this session
-                        if session.enable_tts:
-                            # Generate TTS for the translated text
-                            print(f"TTS: Generating TTS for translation in {language.code.value}: '{translated_text}'")
-                            print(f"TTS Model: VITS ONNX (mutisya/vits-tts-onnx-fp32-{language.name.lower()}) - File: tts_service_onnx.py")
-                            tts_task = asyncio.create_task(
-                                self.tts_service.generate_speech_dual_format(translated_text, language.code.value)
-                            )
-                            audio_tasks.append((
-                                language.code.value,
-                                translated_text,
-                                tts_task,
-                                False  # is_original
-                            ))
-                        else:
-                            print(f"TTS: Skipping TTS generation for translation (disabled for this session)")
-                    else:
-                        print(f"TRANSLATION FAILED: No translated text returned for {language.name}")
-                except Exception as e:
-                    print(f"Translation error for {language.name}: {e}")
-            # Wait for all TTS generation to complete and broadcast with proper alignment
-            for language_code, text, audio_task, is_original in audio_tasks:
-                try:
-                    audio_result = await audio_task
-                    if audio_result and (audio_result[0] or audio_result[1]):
-                        webm_data, wav_data = audio_result
-                        print(f"TTS: Audio generated successfully for {language_code}")
-                        if webm_data:
-                            print(f"TTS: WebM audio: {len(webm_data)} bytes")
-                        if wav_data:
-                            print(f"TTS: WAV audio: {len(wav_data)} bytes")
-                        print(f"TTS: Text for {language_code}: '{text}'")
-                        # Broadcast TTS audio with explicit message-text-audio alignment (dual format)
-                        await self._broadcast_tts_audio_aligned_dual_format(
-                            message.session_id,
-                            message.id,
-                            language_code,
-                            text,
-                            webm_data,
-                            wav_data,
-                            is_original
-                        )
-                    else:
-                        print(f"TTS: Failed to generate audio for {language_code}")
-                except Exception as e:
-                    print(f"TTS generation error for {language_code}: {e}")
-            print(f"=== TRANSLATION/TTS PROCESSING END ===")
-        except Exception as e:
-            print(f"Error in _process_translations_and_tts: {e}")
-            import traceback
-            traceback.print_exc()
-    async def _broadcast_to_session(self, session_id: str, event: str, data: dict, exclude_sid: str = None):
-        """Broadcast message to all clients in a session"""
-        if session_id not in self.session_clients:
-            return
-        # Create a copy of the set to avoid concurrent modification
-        client_sids = list(self.session_clients[session_id])
-        for sid in client_sids:
-            if sid != exclude_sid:
-                try:
-                    await self.sio.emit(event, data, room=sid)
-                except Exception as e:
-                    print(f"Error broadcasting {event} to client {sid}: {e}")
-    async def _broadcast_tts_audio_aligned(self, session_id: str, message_id: str,
-                                           language_code: str, text: str, audio_data: bytes,
-                                           is_original: bool = False):
-        """Broadcast TTS audio with explicit message-text-audio alignment"""
-        try:
-            if session_id not in self.session_clients:
-                return
-            print(f"TTS ALIGNED: Broadcasting audio for message {message_id}")
-            print(f"TTS ALIGNED: Language: {language_code}")
-            print(f"TTS ALIGNED: Text: '{text}'")
-            print(f"TTS ALIGNED: Audio size: {len(audio_data)} bytes")
-            print(f"TTS ALIGNED: Is original: {is_original}")
-            # Create a copy of the set to avoid concurrent modification
-            client_sids = list(self.session_clients[session_id])
-            # Send audio data in chunks to all participants with explicit alignment data
-            for sid in client_sids:
-                try:
-                    chunk_size = 4096
-                    for i in range(0, len(audio_data), chunk_size):
-                        chunk = audio_data[i:i + chunk_size]
-                        is_last_chunk = i + chunk_size >= len(audio_data)
-                        chunk_data = {
-                            'messageId': message_id,           # Explicit message ID
-                            'languageCode': language_code,     # Language of THIS audio
-                            'text': text,                      # Text that THIS audio represents
-                            'isOriginal': is_original,         # Whether this is original or translation
-                            'chunk': list(chunk),
-                            'isLast': is_last_chunk,
-                            'chunkIndex': i // chunk_size,     # Chunk ordering
-                            'totalChunks': (len(audio_data) + chunk_size - 1) // chunk_size
-                        }
-                        await self.sio.emit('tts_audio_chunk', chunk_data, room=sid)
-                        # Small delay to prevent overwhelming
-                        await asyncio.sleep(0.01)
-                    print(f"TTS ALIGNED: Successfully sent aligned audio to participant {sid}")
-                except Exception as e:
-                    print(f"TTS ALIGNED: Error sending audio to participant {sid}: {e}")
-        except Exception as e:
-            print(f"TTS ALIGNED: Error broadcasting aligned audio: {e}")
-    async def _broadcast_tts_audio_aligned_dual_format(self, session_id: str, message_id: str,
-                                                       language_code: str, text: str, webm_data: bytes,
-                                                       wav_data: bytes, is_original: bool = False):
-        """Broadcast TTS audio with both WebM and WAV formats for cross-platform compatibility"""
-        try:
-            if session_id not in self.session_clients:
-                return
-            print(f"TTS DUAL FORMAT: Broadcasting audio for message {message_id}")
-            print(f"TTS DUAL FORMAT: Language: {language_code}")
-            print(f"TTS DUAL FORMAT: Text: '{text}'")
-            if webm_data:
-                print(f"TTS DUAL FORMAT: WebM size: {len(webm_data)} bytes")
-            if wav_data:
-                print(f"TTS DUAL FORMAT: WAV size: {len(wav_data)} bytes")
-            print(f"TTS DUAL FORMAT: Is original: {is_original}")
-            # Create a copy of the set to avoid concurrent modification
-            client_sids = list(self.session_clients[session_id])
-            # Use WebM data for chunking (primary format for web clients)
-            primary_audio_data = webm_data if webm_data else wav_data
-            if not primary_audio_data:
-                print("TTS DUAL FORMAT: No audio data available")
-                return
-            # Send audio data in chunks to all participants with dual format support
-            chunk_size = 4096
-            for sid in client_sids:
-                try:
-                    for i in range(0, len(primary_audio_data), chunk_size):
-                        chunk = primary_audio_data[i:i + chunk_size]
-                        is_last_chunk = i + chunk_size >= len(primary_audio_data)
-                        # Prepare WAV chunk if available
-                        wav_chunk = None
-                        if wav_data and i < len(wav_data):
-                            wav_end = min(i + chunk_size, len(wav_data))
-                            wav_chunk = wav_data[i:wav_end]
-                        chunk_data = {
-                            'messageId': message_id,           # Explicit message ID
-                            'languageCode': language_code,     # Language of THIS audio
-                            'text': text,                      # Text that THIS audio represents
-                            'isOriginal': is_original,         # Whether this is original or translation
-                            'chunk': list(chunk),              # WebM audio chunk (for web clients)
-                            'wavChunk': list(wav_chunk) if wav_chunk else None,  # WAV audio chunk (for Android clients)
-                            'isLast': is_last_chunk,
-                            'chunkIndex': i // chunk_size,     # Chunk ordering
-                            'totalChunks': (len(primary_audio_data) + chunk_size - 1) // chunk_size,
-                            'format': 'webm',                 # Primary format
-                            'wavFormat': 'wav' if wav_chunk else None  # Secondary format available
-                        }
-                        await self.sio.emit('tts_audio_chunk', chunk_data, room=sid)
-                        # Small delay to prevent overwhelming
-                        await asyncio.sleep(0.01)
-                    print(f"TTS DUAL FORMAT: Successfully sent dual format audio to participant {sid}")
-                except Exception as e:
-                    print(f"TTS DUAL FORMAT: Error sending audio to participant {sid}: {e}")
-        except Exception as e:
-            print(f"TTS DUAL FORMAT: Error broadcasting dual format audio: {e}")
-    async def _broadcast_tts_audio_to_all_participants(self, session_id: str, language_code: str,
-                                                       audio_data: bytes, message_id: str, text: str):
-        """Legacy method - now calls the aligned version"""
-        await self._broadcast_tts_audio_aligned(
-            session_id, message_id, language_code, text, audio_data, False
-        )
-    async def _broadcast_audio_to_language_participants(self, session_id: str, language_code: str,
-                                                        audio_data: bytes, message_id: str):
-        """Broadcast audio to participants listening in specific language (legacy method)"""
-        try:
-            session = await self.session_manager.get_session(session_id)
-            if not session:
-                return
-            # Find participants with matching language
-            target_participants = [p for p in session.participants if p.language.code.value == language_code]
-            for participant in target_participants:
-                # Find client SID for this participant
-                participant_sid = None
-                for sid, pid in self.client_participants.items():
-                    if pid == participant.id:
-                        participant_sid = sid
-                        break
-                if participant_sid:
-                    print(f"TTS: Broadcasting audio to participant {participant.name} in {language_code}")
-                    # Send audio data in chunks
-                    chunk_size = 4096
-                    for i in range(0, len(audio_data), chunk_size):
-                        chunk = audio_data[i:i + chunk_size]
-                        await self.sio.emit('tts_audio_chunk', {
-                            'messageId': message_id,
-                            'chunk': list(chunk),
-                            'isLast': i + chunk_size >= len(audio_data)
-                        }, room=participant_sid)
-                        # Small delay to prevent overwhelming
-                        await asyncio.sleep(0.01)
-        except Exception as e:
-            print(f"TTS: Error broadcasting audio: {e}")
-    async def _cleanup_client(self, sid: str):
-        """Clean up client data on disconnect"""
-        try:
-            participant_id = self.client_participants.get(sid)
-            session_id = self.client_sessions.get(sid)
-            if participant_id:
-                # Remove participant from session
-                await self.session_manager.remove_participant(participant_id)
-                # Clear participant buffers
-                self.transcription_service.clear_participant_buffers(participant_id)
-                # Clear current message tracking
-                if participant_id in self.participant_current_message:
-                    del self.participant_current_message[participant_id]
-                del self.client_participants[sid]
-            if session_id:
-                # Remove from session clients
-                if session_id in self.session_clients:
-                    self.session_clients[session_id].discard(sid)
-                    if not self.session_clients[session_id]:
-                        del self.session_clients[session_id]
-                        # If session is empty, clear processed messages for this session
-                        self._cleanup_session_processed_messages(session_id)
-                del self.client_sessions[sid]
-        except Exception as e:
-            print(f"Error cleaning up client {sid}: {e}")
-    def _cleanup_session_processed_messages(self, session_id: str):
-        """Clean up processed messages for empty sessions to prevent memory leaks"""
-        try:
-            # Remove processed messages that belong to this session
-            messages_to_remove = []
-            for message_id in list(self.processed_messages):
-                if message_id in self.messages and self.messages[message_id].session_id == session_id:
-                    messages_to_remove.append(message_id)
-            for message_id in messages_to_remove:
-                self.processed_messages.discard(message_id)
-                if message_id in self.messages:
-                    del self.messages[message_id]
-            print(f"Cleaned up {len(messages_to_remove)} processed messages for session {session_id}")
-        except Exception as e:
-            print(f"Error cleaning up session processed messages: {e}")
-    async def _emit_error(self, sid: str, message: str):
-        """Emit error message to specific client"""
-        try:
-            await self.sio.emit('join_error', message, room=sid)
-        except Exception as e:
-            print(f"Error emitting error to {sid}: {e}")
-    async def handle_update_participant_language(self, sid: str, data: dict):
-        """Handle participant language update (affects speech recognition)"""
-        try:
-            session_id = data.get('sessionId')
-            participant_id = data.get('participantId')
-            language_code = data.get('language')
-            print(f"=== UPDATE PARTICIPANT LANGUAGE ===")
-            print(f"Session ID: {session_id}")
-            print(f"Participant ID: {participant_id}")
-            print(f"New Language: {language_code}")
-            if not all([session_id, participant_id, language_code]):
-                await self._emit_error(sid, "Missing required fields")
-                return
-            # Validate language code
-            try:
-                from app.models import LanguageCode
-                lang_enum = LanguageCode(language_code)
-                print(f"Language code validated: {lang_enum}")
-            except ValueError:
-                await self._emit_error(sid, f"Invalid language code: {language_code}")
-                return
-            # Update participant's language in session
-            session = await self.session_manager.get_session(session_id)
-            if session:
-                for participant in session.participants:
-                    if participant.id == participant_id:
-                        # Update participant's language using LANGUAGE_MAP for complete Language object
-                        if lang_enum in LANGUAGE_MAP:
-                            participant.language = LANGUAGE_MAP[lang_enum]
-                            print(f"Updated participant {participant.name} language to {lang_enum.value} ({participant.language.display_name})")
-                        else:
-                            print(f"Warning: Language {lang_enum.value} not found in LANGUAGE_MAP, using fallback")
-                            from app.models import Language
-                            participant.language = Language(code=lang_enum, name=lang_enum.value, display_name=lang_enum.value)
-                        # Notify all clients in session
-                        await self._broadcast_to_session(session_id, 'participant_language_updated', {
-                            'participantId': participant_id,
-                            'language': language_code
-                        })
-                        break
-            print(f"=== UPDATE PARTICIPANT LANGUAGE COMPLETE ===")
-        except Exception as e:
-            print(f"Error in handle_update_participant_language: {e}")
-            import traceback
-            traceback.print_exc()
-            await self._emit_error(sid, "Failed to update participant language")
-    async def handle_update_session_languages(self, sid: str, data: dict):
-        """Handle session languages update (affects translation targets)"""
-        try:
-            session_id = data.get('sessionId')
-            languages = data.get('languages', [])
-            print(f"=== UPDATE SESSION LANGUAGES (REPLACE MODE) ===")
-            print(f"Session ID: {session_id}")
-            print(f"New Languages: {languages}")
-            if not session_id or not languages:
-                await self._emit_error(sid, "Missing required fields")
-                return
-            # Get current session for comparison
-            session = await self.session_manager.get_session(session_id)
-            if not session:
-                await self._emit_error(sid, "Session not found")
-                return
-            current_languages = [lang.code.value for lang in session.languages]
-            print(f"Before update - Session languages: {current_languages}")
-            # Validate all language codes and create Language objects
-            validated_languages = []
-            try:
-                from app.models import Language, LanguageCode
-                from app.services.session_manager import LANGUAGE_MAP
-                for lang_code in languages:
-                    lang_enum = LanguageCode(lang_code)
-                    language = LANGUAGE_MAP[lang_enum]
-                    validated_languages.append(language)
-                    print(f"Validated language: {lang_code} -> {language.name}")
-            except ValueError as e:
-                await self._emit_error(sid, f"Invalid language code: {e}")
-                return
-            # REPLACE session languages (not add to them)
-            session.languages = validated_languages
-            new_languages = [lang.code.value for lang in session.languages]
-            print(f"After update - Session languages: {new_languages}")
-            # Verify the session manager has the updated languages
-            verification_session = await self.session_manager.get_session(session_id)
-            if verification_session:
-                verification_languages = [lang.code.value for lang in verification_session.languages]
-                print(f"Verification - Session manager languages: {verification_languages}")
-            # Notify all clients in session about the update
-            await self._broadcast_to_session(session_id, 'session_languages_updated', {
-                'sessionId': session_id,
-                'languages': new_languages,
-                'previous': current_languages
-            })
-            print(f"=== UPDATE SESSION LANGUAGES COMPLETE ===")
-        except Exception as e:
-            print(f"Error in handle_update_session_languages: {e}")
-            import traceback
-            traceback.print_exc()
-            await self._emit_error(sid, "Failed to update session languages")