Test_Voice / config.py
ducnguyen1978's picture
Upload config.py
0a34039 verified
#!/usr/bin/env python3
"""
Configuration file for Translation AI Agent
"""
import os
from typing import Dict, List
class Config:
"""Configuration settings for the AI Agent"""
# Model Configuration
TRANSLATION_MODEL = "facebook/nllb-200-distilled-600M"
SPEECH_RECOGNITION_MODEL = "openai/whisper-base"
TEXT_TO_SPEECH_MODEL = "microsoft/speecht5_tts"
# Alternative models for different performance/quality tradeoffs
MODEL_ALTERNATIVES = {
"translation": {
"small": "facebook/nllb-200-distilled-600M",
"medium": "facebook/nllb-200-1.3B",
"large": "facebook/nllb-200-3.3B"
},
"speech_recognition": {
"tiny": "openai/whisper-tiny",
"base": "openai/whisper-base",
"small": "openai/whisper-small",
"medium": "openai/whisper-medium"
},
"tts": {
"speecht5": "microsoft/speecht5_tts",
"bark": "suno/bark"
}
}
# Supported Languages
SUPPORTED_LANGUAGES = {
'en': 'English',
'es': 'Spanish',
'fr': 'French',
'de': 'German',
'it': 'Italian',
'pt': 'Portuguese',
'ru': 'Russian',
'ja': 'Japanese',
'ko': 'Korean',
'zh': 'Chinese',
'ar': 'Arabic',
'hi': 'Hindi',
'vi': 'Vietnamese',
'th': 'Thai',
'tr': 'Turkish',
'nl': 'Dutch',
'pl': 'Polish',
'sv': 'Swedish',
'da': 'Danish',
'no': 'Norwegian'
}
# Language Detection Keywords
LANGUAGE_KEYWORDS = {
'en': ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'that', 'it', 'with'],
'es': ['el', 'la', 'de', 'que', 'y', 'en', 'un', 'es', 'se', 'no'],
'fr': ['le', 'de', 'et', 'à', 'un', 'il', 'être', 'et', 'en', 'avoir'],
'de': ['der', 'die', 'und', 'in', 'den', 'von', 'zu', 'das', 'mit', 'sich'],
'it': ['di', 'a', 'da', 'in', 'con', 'su', 'per', 'tra', 'fra', 'la'],
'pt': ['de', 'a', 'o', 'que', 'e', 'do', 'da', 'em', 'um', 'para'],
'ru': ['в', 'и', 'не', 'на', 'я', 'с', 'что', 'а', 'по', 'это'],
'ja': ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て', 'と', 'し'],
'ko': ['이', '가', '을', '를', '에', '의', '와', '과', '로', '으로'],
'zh': ['的', '一', '是', '在', '不', '了', '有', '和', '人', '这'],
'ar': ['في', 'من', 'إلى', 'على', 'أن', 'هذا', 'هذه', 'التي', 'التي', 'كان'],
'hi': ['का', 'के', 'की', 'में', 'है', 'और', 'को', 'से', 'पर', 'या'],
'vi': ['và', 'của', 'có', 'trong', 'một', 'là', 'với', 'này', 'để', 'được']
}
# Audio Configuration
AUDIO_SAMPLE_RATE = 16000
AUDIO_CHUNK_DURATION = 30 # seconds
MAX_AUDIO_DURATION = 300 # seconds (5 minutes)
SUPPORTED_AUDIO_FORMATS = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
# Model Performance Settings
MAX_LENGTH = 512
BATCH_SIZE = 1
DEVICE_MAP = "auto"
TORCH_DTYPE = "float16"
# Environment Variables
HUGGINGFACE_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN")
CUDA_VISIBLE_DEVICES = os.getenv("CUDA_VISIBLE_DEVICES", "0")
TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", "./cache")
# Gradio Configuration
GRADIO_CONFIG = {
"server_name": "0.0.0.0",
"server_port": int(os.getenv("PORT", "7860")),
"share": os.getenv("GRADIO_SHARE", "True").lower() == "true",
"debug": os.getenv("DEBUG", "False").lower() == "true",
"show_error": True,
"quiet": False,
"inbrowser": False,
"auth": None # Can be set to ("username", "password") for authentication
}
# Theme Configuration
GRADIO_THEME = "soft" # Options: default, soft, monochrome, glass, etc.
# Custom CSS
CUSTOM_CSS = """
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
font-family: 'Inter', sans-serif;
}
.header {
text-align: center;
padding: 25px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border-radius: 15px;
margin-bottom: 20px;
box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3);
}
.tab-nav button {
font-weight: 600;
border-radius: 10px;
margin: 5px;
transition: all 0.3s ease;
}
.tab-nav button:hover {
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
}
.gr-button {
border-radius: 8px;
font-weight: 600;
transition: all 0.3s ease;
}
.gr-button:hover {
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
}
.gr-textbox, .gr-dropdown {
border-radius: 8px;
border: 2px solid #e5e7eb;
transition: all 0.3s ease;
}
.gr-textbox:focus, .gr-dropdown:focus {
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
}
"""
# Logging Configuration
LOGGING_CONFIG = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'standard': {
'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
},
},
'handlers': {
'default': {
'level': 'INFO',
'formatter': 'standard',
'class': 'logging.StreamHandler',
},
},
'loggers': {
'': {
'handlers': ['default'],
'level': 'INFO',
'propagate': False
}
}
}
# Rate Limiting (for production deployment)
RATE_LIMIT = {
"calls_per_minute": 60,
"calls_per_hour": 1000
}
# Feature Flags
FEATURES = {
"enable_live_translation": True,
"enable_batch_processing": True,
"enable_custom_models": False,
"enable_analytics": True,
"enable_caching": True
}
# Cache Configuration
CACHE_CONFIG = {
"translation_cache_size": 1000,
"model_cache_dir": "./cache/models",
"audio_cache_dir": "./cache/audio",
"cache_ttl": 3600 # 1 hour
}
@classmethod
def get_model_config(cls, model_size: str = "base") -> Dict[str, str]:
"""Get model configuration based on size preference"""
return {
"translation": cls.MODEL_ALTERNATIVES["translation"].get(model_size, cls.TRANSLATION_MODEL),
"speech_recognition": cls.MODEL_ALTERNATIVES["speech_recognition"].get(model_size, cls.SPEECH_RECOGNITION_MODEL),
"tts": cls.MODEL_ALTERNATIVES["tts"]["speecht5"]
}
@classmethod
def get_language_pairs(cls) -> List[tuple]:
"""Get all possible language pairs for translation"""
langs = list(cls.SUPPORTED_LANGUAGES.keys())
pairs = []
for src in langs:
for tgt in langs:
if src != tgt:
pairs.append((src, tgt))
return pairs
@classmethod
def validate_language(cls, lang_code: str) -> bool:
"""Validate if language code is supported"""
return lang_code in cls.SUPPORTED_LANGUAGES
@classmethod
def get_language_name(cls, lang_code: str) -> str:
"""Get language display name from code"""
return cls.SUPPORTED_LANGUAGES.get(lang_code, lang_code)
# Create global config instance
config = Config()