Spaces:

CXM06
/

ai-image-caption-generation

Sleeping

App Files Files Community

ai-image-caption-generation / config.py

CXM06

First commit - Individual test scripts present

d0c8d86 7 months ago

raw

history blame contribute delete

11.2 kB

	"""
	Centralized Configuration Module

	This module contains all configuration settings for the AI Image Caption Generator.
	Follows the single source of truth principle for easy maintenance and deployment.
	"""

	import os
	from pathlib import Path
	from typing import Dict, List, Final
	from dataclasses import dataclass
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# ============================================================================
	# PROJECT PATHS
	# ============================================================================

	PROJECT_ROOT: Final[Path] = Path(__file__).parent
	CACHE_DIR: Final[Path] = PROJECT_ROOT / "cache"
	MODEL_CACHE_DIR: Final[Path] = CACHE_DIR / "models"
	ANALYTICS_FILE: Final[Path] = CACHE_DIR / "analytics.json"
	STATIC_DIR: Final[Path] = PROJECT_ROOT / "static"

	# Create directories if they don't exist
	for directory in [CACHE_DIR, MODEL_CACHE_DIR, STATIC_DIR]:
	directory.mkdir(parents=True, exist_ok=True)


	# ============================================================================
	# MODEL CONFIGURATION
	# ============================================================================

	@dataclass(frozen=True)
	class ModelConfig:
	"""Configuration for caption generation models"""

	# BLIP Model
	BLIP_MODEL_NAME: str = "Salesforce/blip-image-captioning-base"
	BLIP_MAX_LENGTH: int = 50
	BLIP_NUM_BEAMS: int = 3

	# GIT Model
	GIT_MODEL_NAME: str = "microsoft/git-large-coco"
	GIT_MAX_LENGTH: int = 50
	GIT_NUM_BEAMS: int = 3

	# Device Configuration
	DEVICE: str = "cuda" # Will auto-fallback to CPU if CUDA unavailable

	# Memory Management
	MODEL_CACHE_DIR: Path = MODEL_CACHE_DIR
	LOW_MEMORY_MODE: bool = False # Enable for systems with <8GB GPU memory


	# ============================================================================
	# IMAGE PROCESSING CONFIGURATION
	# ============================================================================

	@dataclass(frozen=True)
	class ImageConfig:
	"""Configuration for image validation and preprocessing"""

	# Size Constraints
	MAX_FILE_SIZE_MB: int = 5
	MAX_FILE_SIZE_BYTES: int = MAX_FILE_SIZE_MB * 1024 * 1024
	MAX_DIMENSION: int = 512 # Max width/height for model input
	MIN_DIMENSION: int = 32 # Minimum acceptable dimension

	# Supported Formats
	ALLOWED_FORMATS: tuple = ("JPEG", "PNG", "WEBP", "JPG")
	ALLOWED_EXTENSIONS: tuple = (".jpg", ".jpeg", ".png", ".webp")

	# Processing
	RESIZE_QUALITY: int = 95 # JPEG quality after resize
	MAINTAIN_ASPECT_RATIO: bool = True


	# ============================================================================
	# GROQ API CONFIGURATION
	# ============================================================================

	@dataclass(frozen=True)
	class GroqConfig:
	"""Configuration for Groq API styling"""

	# API Settings
	API_KEY: str = os.getenv("GROQ_API_KEY", "")
	MODEL_NAME: str = "llama-3.1-8b-instant"

	# Request Parameters
	MAX_TOKENS: int = 150
	TEMPERATURE: float = 0.7
	TOP_P: float = 0.9
	TIMEOUT_SECONDS: int = 10

	# Retry Logic
	MAX_RETRIES: int = 3
	RETRY_DELAY_SECONDS: float = 1.0

	# Rate Limiting
	REQUESTS_PER_MINUTE: int = 30


	# ============================================================================
	# STYLE CONFIGURATION
	# ============================================================================

	class StyleConfig:
	"""Configuration for caption styling options"""

	STYLES: Final[Dict[str, str]] = {
	"None": "Keep the original caption without any modifications.",
	"Professional": "Rewrite this image caption in a professional, business-appropriate tone. Make it clear, formal, and suitable for corporate presentations or reports.",
	"Creative": "Transform this caption into a creative, artistic, and imaginative description. Use vivid language and engaging expressions.",
	"Social Media": "Rewrite this caption for social media platforms. Make it engaging, add relevant emojis, and make it shareable. Keep it under 280 characters.",
	"Technical": "Rewrite this caption with technical precision and detailed analysis. Focus on specific elements, composition, and visual characteristics."
	}

	DEFAULT_STYLE: Final[str] = "Professional"

	# Fallback templates when API fails
	FALLBACK_TEMPLATES: Final[Dict[str, str]] = {
	"Professional": "Image Description: {caption}",
	"Creative": "✨ {caption} ✨",
	"Social Media": "📸 {caption} #AI #ImageCaption",
	"Technical": "Visual Analysis: {caption}",
	"None": "{caption}"
	}


	# ============================================================================
	# CACHE CONFIGURATION
	# ============================================================================

	@dataclass(frozen=True)
	class CacheConfig:
	"""Configuration for caching system"""

	# Cache Settings
	MAX_CACHE_SIZE: int = 100 # Maximum number of cached items
	CACHE_TTL_SECONDS: int = 3600 # Time to live: 1 hour

	# Cache Keys
	ENABLE_CAPTION_CACHE: bool = True
	CACHE_KEY_ALGO: str = "md5" # Hashing algorithm for cache keys


	# ============================================================================
	# ANALYTICS CONFIGURATION
	# ============================================================================

	@dataclass(frozen=True)
	class AnalyticsConfig:
	"""Configuration for usage analytics"""

	# Storage
	ANALYTICS_FILE: Path = ANALYTICS_FILE
	SAVE_INTERVAL_SECONDS: int = 30 # Auto-save every 30 seconds

	# Metrics to Track
	TRACK_PROCESSING_TIME: bool = True
	TRACK_STYLE_USAGE: bool = True
	TRACK_MODEL_USAGE: bool = True
	TRACK_ERROR_RATE: bool = True


	# ============================================================================
	# GRADIO UI CONFIGURATION
	# ============================================================================

	@dataclass(frozen=True)
	class UIConfig:
	"""Configuration for Gradio interface"""

	# App Metadata
	TITLE: str = "🖼️ AI Image Caption Generator"
	DESCRIPTION: str = """
	Generate professional image captions using state-of-the-art AI models.
	Upload an image and choose your preferred style - get instant captions from both BLIP and GIT models.
	"""

	# UI Settings
	THEME: str = "soft" # Gradio theme
	SHOW_API: bool = False
	SHOW_ERROR: bool = True

	# Component Settings
	IMAGE_HEIGHT: int = 400
	MAX_QUEUE_SIZE: int = 10

	# Example Images
	EXAMPLES_DIR: Path = STATIC_DIR / "images" / "examples"


	# ============================================================================
	# LOGGING CONFIGURATION
	# ============================================================================

	@dataclass(frozen=True)
	class LogConfig:
	"""Configuration for logging"""

	LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
	LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
	LOG_DATE_FORMAT: str = "%Y-%m-%d %H:%M:%S"


	# ============================================================================
	# PERFORMANCE CONFIGURATION
	# ============================================================================

	@dataclass(frozen=True)
	class PerformanceConfig:
	"""Configuration for performance optimization"""

	# Processing Timeouts
	MAX_PROCESSING_TIME_SECONDS: int = 30

	# Model Loading
	LAZY_LOAD_MODELS: bool = False # Load models on first use vs startup

	# Batch Processing (future feature)
	ENABLE_BATCH_PROCESSING: bool = False
	MAX_BATCH_SIZE: int = 1


	# ============================================================================
	# INSTANTIATE CONFIGURATIONS
	# ============================================================================

	# Create singleton instances
	model_config = ModelConfig()
	image_config = ImageConfig()
	groq_config = GroqConfig()
	style_config = StyleConfig()
	cache_config = CacheConfig()
	analytics_config = AnalyticsConfig()
	ui_config = UIConfig()
	log_config = LogConfig()
	performance_config = PerformanceConfig()


	# ============================================================================
	# VALIDATION
	# ============================================================================

	def validate_config() -> tuple[bool, list[str]]:
	"""
	Validate all configuration settings

	Returns:
	tuple: (is_valid, list_of_errors)
	"""
	errors = []

	# Check Groq API Key
	if not groq_config.API_KEY:
	errors.append("GROQ_API_KEY not found in environment variables")

	# Check required directories
	required_dirs = [CACHE_DIR, MODEL_CACHE_DIR]
	for directory in required_dirs:
	if not directory.exists():
	errors.append(f"Required directory not found: {directory}")

	# Validate image constraints
	if image_config.MAX_DIMENSION < image_config.MIN_DIMENSION:
	errors.append("MAX_DIMENSION must be greater than MIN_DIMENSION")

	# Validate style options
	if not style_config.STYLES:
	errors.append("No style options configured")

	if style_config.DEFAULT_STYLE not in style_config.STYLES:
	errors.append(f"Default style '{style_config.DEFAULT_STYLE}' not in available styles")

	return len(errors) == 0, errors


	# ============================================================================
	# CONFIGURATION SUMMARY
	# ============================================================================

	def print_config_summary() -> None:
	"""Print configuration summary for debugging"""
	print("=" * 60)
	print("AI IMAGE CAPTION GENERATOR - CONFIGURATION SUMMARY")
	print("=" * 60)
	print(f"Project Root: {PROJECT_ROOT}")
	print(f"Cache Directory: {CACHE_DIR}")
	print(f"Model Cache: {MODEL_CACHE_DIR}")
	print(f"\nModels:")
	print(f" - BLIP: {model_config.BLIP_MODEL_NAME}")
	print(f" - GIT: {model_config.GIT_MODEL_NAME}")
	print(f" - Device: {model_config.DEVICE}")
	print(f"\nGroq API:")
	print(f" - Model: {groq_config.MODEL_NAME}")
	print(f" - API Key: {'✓ Configured' if groq_config.API_KEY else '✗ Missing'}")
	print(f"\nImage Processing:")
	print(f" - Max Size: {image_config.MAX_FILE_SIZE_MB}MB")
	print(f" - Max Dimension: {image_config.MAX_DIMENSION}px")
	print(f" - Formats: {', '.join(image_config.ALLOWED_FORMATS)}")
	print(f"\nStyle Options: {len(style_config.STYLES)}")
	for style in style_config.STYLES.keys():
	print(f" - {style}")
	print(f"\nCache: {cache_config.MAX_CACHE_SIZE} items")
	print(f"Analytics: {analytics_config.ANALYTICS_FILE}")
	print("=" * 60)

	# Validate configuration
	is_valid, errors = validate_config()
	if not is_valid:
	print("\n⚠️ CONFIGURATION ERRORS:")
	for error in errors:
	print(f" - {error}")
	print("=" * 60)
	else:
	print("\n✓ Configuration validated successfully")
	print("=" * 60)


	if __name__ == "__main__":
	# Run configuration validation when executed directly
	print_config_summary()