Spaces:
Sleeping
Sleeping
| """ | |
| Centralized Configuration Module | |
| This module contains all configuration settings for the AI Image Caption Generator. | |
| Follows the single source of truth principle for easy maintenance and deployment. | |
| """ | |
| import os | |
| from pathlib import Path | |
| from typing import Dict, List, Final | |
| from dataclasses import dataclass | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| # ============================================================================ | |
| # PROJECT PATHS | |
| # ============================================================================ | |
| PROJECT_ROOT: Final[Path] = Path(__file__).parent | |
| CACHE_DIR: Final[Path] = PROJECT_ROOT / "cache" | |
| MODEL_CACHE_DIR: Final[Path] = CACHE_DIR / "models" | |
| ANALYTICS_FILE: Final[Path] = CACHE_DIR / "analytics.json" | |
| STATIC_DIR: Final[Path] = PROJECT_ROOT / "static" | |
| # Create directories if they don't exist | |
| for directory in [CACHE_DIR, MODEL_CACHE_DIR, STATIC_DIR]: | |
| directory.mkdir(parents=True, exist_ok=True) | |
| # ============================================================================ | |
| # MODEL CONFIGURATION | |
| # ============================================================================ | |
| class ModelConfig: | |
| """Configuration for caption generation models""" | |
| # BLIP Model | |
| BLIP_MODEL_NAME: str = "Salesforce/blip-image-captioning-base" | |
| BLIP_MAX_LENGTH: int = 50 | |
| BLIP_NUM_BEAMS: int = 3 | |
| # GIT Model | |
| GIT_MODEL_NAME: str = "microsoft/git-large-coco" | |
| GIT_MAX_LENGTH: int = 50 | |
| GIT_NUM_BEAMS: int = 3 | |
| # Device Configuration | |
| DEVICE: str = "cuda" # Will auto-fallback to CPU if CUDA unavailable | |
| # Memory Management | |
| MODEL_CACHE_DIR: Path = MODEL_CACHE_DIR | |
| LOW_MEMORY_MODE: bool = False # Enable for systems with <8GB GPU memory | |
| # ============================================================================ | |
| # IMAGE PROCESSING CONFIGURATION | |
| # ============================================================================ | |
| class ImageConfig: | |
| """Configuration for image validation and preprocessing""" | |
| # Size Constraints | |
| MAX_FILE_SIZE_MB: int = 5 | |
| MAX_FILE_SIZE_BYTES: int = MAX_FILE_SIZE_MB * 1024 * 1024 | |
| MAX_DIMENSION: int = 512 # Max width/height for model input | |
| MIN_DIMENSION: int = 32 # Minimum acceptable dimension | |
| # Supported Formats | |
| ALLOWED_FORMATS: tuple = ("JPEG", "PNG", "WEBP", "JPG") | |
| ALLOWED_EXTENSIONS: tuple = (".jpg", ".jpeg", ".png", ".webp") | |
| # Processing | |
| RESIZE_QUALITY: int = 95 # JPEG quality after resize | |
| MAINTAIN_ASPECT_RATIO: bool = True | |
| # ============================================================================ | |
| # GROQ API CONFIGURATION | |
| # ============================================================================ | |
| class GroqConfig: | |
| """Configuration for Groq API styling""" | |
| # API Settings | |
| API_KEY: str = os.getenv("GROQ_API_KEY", "") | |
| MODEL_NAME: str = "llama-3.1-8b-instant" | |
| # Request Parameters | |
| MAX_TOKENS: int = 150 | |
| TEMPERATURE: float = 0.7 | |
| TOP_P: float = 0.9 | |
| TIMEOUT_SECONDS: int = 10 | |
| # Retry Logic | |
| MAX_RETRIES: int = 3 | |
| RETRY_DELAY_SECONDS: float = 1.0 | |
| # Rate Limiting | |
| REQUESTS_PER_MINUTE: int = 30 | |
| # ============================================================================ | |
| # STYLE CONFIGURATION | |
| # ============================================================================ | |
| class StyleConfig: | |
| """Configuration for caption styling options""" | |
| STYLES: Final[Dict[str, str]] = { | |
| "None": "Keep the original caption without any modifications.", | |
| "Professional": "Rewrite this image caption in a professional, business-appropriate tone. Make it clear, formal, and suitable for corporate presentations or reports.", | |
| "Creative": "Transform this caption into a creative, artistic, and imaginative description. Use vivid language and engaging expressions.", | |
| "Social Media": "Rewrite this caption for social media platforms. Make it engaging, add relevant emojis, and make it shareable. Keep it under 280 characters.", | |
| "Technical": "Rewrite this caption with technical precision and detailed analysis. Focus on specific elements, composition, and visual characteristics." | |
| } | |
| DEFAULT_STYLE: Final[str] = "Professional" | |
| # Fallback templates when API fails | |
| FALLBACK_TEMPLATES: Final[Dict[str, str]] = { | |
| "Professional": "Image Description: {caption}", | |
| "Creative": "✨ {caption} ✨", | |
| "Social Media": "📸 {caption} #AI #ImageCaption", | |
| "Technical": "Visual Analysis: {caption}", | |
| "None": "{caption}" | |
| } | |
| # ============================================================================ | |
| # CACHE CONFIGURATION | |
| # ============================================================================ | |
| class CacheConfig: | |
| """Configuration for caching system""" | |
| # Cache Settings | |
| MAX_CACHE_SIZE: int = 100 # Maximum number of cached items | |
| CACHE_TTL_SECONDS: int = 3600 # Time to live: 1 hour | |
| # Cache Keys | |
| ENABLE_CAPTION_CACHE: bool = True | |
| CACHE_KEY_ALGO: str = "md5" # Hashing algorithm for cache keys | |
| # ============================================================================ | |
| # ANALYTICS CONFIGURATION | |
| # ============================================================================ | |
| class AnalyticsConfig: | |
| """Configuration for usage analytics""" | |
| # Storage | |
| ANALYTICS_FILE: Path = ANALYTICS_FILE | |
| SAVE_INTERVAL_SECONDS: int = 30 # Auto-save every 30 seconds | |
| # Metrics to Track | |
| TRACK_PROCESSING_TIME: bool = True | |
| TRACK_STYLE_USAGE: bool = True | |
| TRACK_MODEL_USAGE: bool = True | |
| TRACK_ERROR_RATE: bool = True | |
| # ============================================================================ | |
| # GRADIO UI CONFIGURATION | |
| # ============================================================================ | |
| class UIConfig: | |
| """Configuration for Gradio interface""" | |
| # App Metadata | |
| TITLE: str = "🖼️ AI Image Caption Generator" | |
| DESCRIPTION: str = """ | |
| Generate professional image captions using state-of-the-art AI models. | |
| Upload an image and choose your preferred style - get instant captions from both BLIP and GIT models. | |
| """ | |
| # UI Settings | |
| THEME: str = "soft" # Gradio theme | |
| SHOW_API: bool = False | |
| SHOW_ERROR: bool = True | |
| # Component Settings | |
| IMAGE_HEIGHT: int = 400 | |
| MAX_QUEUE_SIZE: int = 10 | |
| # Example Images | |
| EXAMPLES_DIR: Path = STATIC_DIR / "images" / "examples" | |
| # ============================================================================ | |
| # LOGGING CONFIGURATION | |
| # ============================================================================ | |
| class LogConfig: | |
| """Configuration for logging""" | |
| LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO") | |
| LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" | |
| LOG_DATE_FORMAT: str = "%Y-%m-%d %H:%M:%S" | |
| # ============================================================================ | |
| # PERFORMANCE CONFIGURATION | |
| # ============================================================================ | |
| class PerformanceConfig: | |
| """Configuration for performance optimization""" | |
| # Processing Timeouts | |
| MAX_PROCESSING_TIME_SECONDS: int = 30 | |
| # Model Loading | |
| LAZY_LOAD_MODELS: bool = False # Load models on first use vs startup | |
| # Batch Processing (future feature) | |
| ENABLE_BATCH_PROCESSING: bool = False | |
| MAX_BATCH_SIZE: int = 1 | |
| # ============================================================================ | |
| # INSTANTIATE CONFIGURATIONS | |
| # ============================================================================ | |
| # Create singleton instances | |
| model_config = ModelConfig() | |
| image_config = ImageConfig() | |
| groq_config = GroqConfig() | |
| style_config = StyleConfig() | |
| cache_config = CacheConfig() | |
| analytics_config = AnalyticsConfig() | |
| ui_config = UIConfig() | |
| log_config = LogConfig() | |
| performance_config = PerformanceConfig() | |
| # ============================================================================ | |
| # VALIDATION | |
| # ============================================================================ | |
| def validate_config() -> tuple[bool, list[str]]: | |
| """ | |
| Validate all configuration settings | |
| Returns: | |
| tuple: (is_valid, list_of_errors) | |
| """ | |
| errors = [] | |
| # Check Groq API Key | |
| if not groq_config.API_KEY: | |
| errors.append("GROQ_API_KEY not found in environment variables") | |
| # Check required directories | |
| required_dirs = [CACHE_DIR, MODEL_CACHE_DIR] | |
| for directory in required_dirs: | |
| if not directory.exists(): | |
| errors.append(f"Required directory not found: {directory}") | |
| # Validate image constraints | |
| if image_config.MAX_DIMENSION < image_config.MIN_DIMENSION: | |
| errors.append("MAX_DIMENSION must be greater than MIN_DIMENSION") | |
| # Validate style options | |
| if not style_config.STYLES: | |
| errors.append("No style options configured") | |
| if style_config.DEFAULT_STYLE not in style_config.STYLES: | |
| errors.append(f"Default style '{style_config.DEFAULT_STYLE}' not in available styles") | |
| return len(errors) == 0, errors | |
| # ============================================================================ | |
| # CONFIGURATION SUMMARY | |
| # ============================================================================ | |
| def print_config_summary() -> None: | |
| """Print configuration summary for debugging""" | |
| print("=" * 60) | |
| print("AI IMAGE CAPTION GENERATOR - CONFIGURATION SUMMARY") | |
| print("=" * 60) | |
| print(f"Project Root: {PROJECT_ROOT}") | |
| print(f"Cache Directory: {CACHE_DIR}") | |
| print(f"Model Cache: {MODEL_CACHE_DIR}") | |
| print(f"\nModels:") | |
| print(f" - BLIP: {model_config.BLIP_MODEL_NAME}") | |
| print(f" - GIT: {model_config.GIT_MODEL_NAME}") | |
| print(f" - Device: {model_config.DEVICE}") | |
| print(f"\nGroq API:") | |
| print(f" - Model: {groq_config.MODEL_NAME}") | |
| print(f" - API Key: {'✓ Configured' if groq_config.API_KEY else '✗ Missing'}") | |
| print(f"\nImage Processing:") | |
| print(f" - Max Size: {image_config.MAX_FILE_SIZE_MB}MB") | |
| print(f" - Max Dimension: {image_config.MAX_DIMENSION}px") | |
| print(f" - Formats: {', '.join(image_config.ALLOWED_FORMATS)}") | |
| print(f"\nStyle Options: {len(style_config.STYLES)}") | |
| for style in style_config.STYLES.keys(): | |
| print(f" - {style}") | |
| print(f"\nCache: {cache_config.MAX_CACHE_SIZE} items") | |
| print(f"Analytics: {analytics_config.ANALYTICS_FILE}") | |
| print("=" * 60) | |
| # Validate configuration | |
| is_valid, errors = validate_config() | |
| if not is_valid: | |
| print("\n⚠️ CONFIGURATION ERRORS:") | |
| for error in errors: | |
| print(f" - {error}") | |
| print("=" * 60) | |
| else: | |
| print("\n✓ Configuration validated successfully") | |
| print("=" * 60) | |
| if __name__ == "__main__": | |
| # Run configuration validation when executed directly | |
| print_config_summary() |