MemPrepMate / src /services /s3_config.py
Christian Kniep
update to v2
5d3ee93
"""
S3 Configuration Module
Manages S3 backup/restore configuration loaded from environment variables.
Provides validation and credential checking for S3-compatible storage
(AWS S3, MinIO, DigitalOcean Spaces, etc.).
"""
import os
import logging
from dataclasses import dataclass
from typing import Optional
import boto3
from botocore.exceptions import ClientError
logger = logging.getLogger(__name__)
# Custom Exceptions
class S3BackupError(Exception):
"""Base exception for S3 backup operations"""
pass
class S3CredentialsError(S3BackupError):
"""Invalid S3 credentials"""
pass
class S3BucketNotFoundError(S3BackupError):
"""S3 bucket does not exist"""
pass
class DatabaseCorruptedError(S3BackupError):
"""SQLite database failed integrity check"""
pass
class S3ConnectionError(S3BackupError):
"""Network error connecting to S3"""
pass
class RestoreError(S3BackupError):
"""Critical error during restore that prevents startup"""
pass
@dataclass
class S3Config:
"""
S3 configuration for backup/restore operations.
Attributes:
enabled: Whether S3 backup/restore is enabled
bucket: S3 bucket name
access_key: AWS access key ID
secret_key: AWS secret access key
region: AWS region (default: us-east-1)
endpoint_url: Custom S3 endpoint for MinIO/DigitalOcean Spaces
upload_timeout: Upload timeout in seconds
download_timeout: Download timeout in seconds
debounce_seconds: Debounce period for backup requests
"""
enabled: bool
bucket: Optional[str] = None
access_key: Optional[str] = None
secret_key: Optional[str] = None
region: str = "us-east-1"
endpoint_url: Optional[str] = None
upload_timeout: int = 60
download_timeout: int = 30
debounce_seconds: int = 300
@staticmethod
def from_env() -> 'S3Config':
"""
Factory method to create S3Config from environment variables.
Environment Variables:
S3_BACKUP_ENABLED: "true" or "false" (default: "false")
S3_BUCKET_NAME: S3 bucket name (required if enabled)
S3_ACCESS_KEY: AWS access key ID (required if enabled)
S3_SECRET_KEY: AWS secret access key (required if enabled)
S3_REGION: AWS region (default: "us-east-1")
S3_ENDPOINT_URL: Custom S3 endpoint (optional)
S3_UPLOAD_TIMEOUT: Upload timeout in seconds (default: 60)
S3_DOWNLOAD_TIMEOUT: Download timeout in seconds (default: 30)
S3_DEBOUNCE_SECONDS: Debounce period in seconds (default: 300)
Returns:
S3Config instance with enabled=False if configuration is incomplete
"""
enabled = os.getenv('S3_BACKUP_ENABLED', 'false').lower() == 'true'
if not enabled:
logger.info("S3 backup/restore disabled (S3_BACKUP_ENABLED not set)")
return S3Config(enabled=False)
# Load required configuration
bucket = os.getenv('S3_BUCKET_NAME')
access_key = os.getenv('S3_ACCESS_KEY')
secret_key = os.getenv('S3_SECRET_KEY')
# Validate required fields
missing = []
if not bucket:
missing.append('S3_BUCKET_NAME')
if not access_key:
missing.append('S3_ACCESS_KEY')
if not secret_key:
missing.append('S3_SECRET_KEY')
if missing:
logger.warning(
f"S3 backup disabled - missing required configuration: {', '.join(missing)}"
)
return S3Config(enabled=False)
# Load optional configuration with defaults
region = os.getenv('S3_REGION', 'us-east-1')
endpoint_url = os.getenv('S3_ENDPOINT_URL') # None for AWS S3
try:
upload_timeout = int(os.getenv('S3_UPLOAD_TIMEOUT', '60'))
download_timeout = int(os.getenv('S3_DOWNLOAD_TIMEOUT', '30'))
debounce_seconds = int(os.getenv('S3_DEBOUNCE_SECONDS', '300'))
except ValueError as e:
logger.warning(f"Invalid timeout configuration: {e}, using defaults")
upload_timeout = 60
download_timeout = 30
debounce_seconds = 300
config = S3Config(
enabled=True,
bucket=bucket,
access_key=access_key,
secret_key=secret_key,
region=region,
endpoint_url=endpoint_url,
upload_timeout=upload_timeout,
download_timeout=download_timeout,
debounce_seconds=debounce_seconds
)
logger.info(
f"S3 backup enabled - bucket: {bucket}, region: {region}, "
f"endpoint: {endpoint_url or 'AWS S3'}"
)
return config
def validate_credentials(self) -> bool:
"""
Test S3 credentials by performing a HeadBucket operation.
Returns:
True if credentials are valid and bucket is accessible
False if credentials are invalid or bucket not found
Raises:
S3ConnectionError: Network or S3 service error
"""
if not self.enabled:
return False
try:
s3_client = boto3.client(
's3',
endpoint_url=self.endpoint_url,
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
region_name=self.region
)
# HeadBucket validates both credentials and bucket existence
s3_client.head_bucket(Bucket=self.bucket)
logger.info(f"S3 credentials validated - bucket '{self.bucket}' is accessible")
return True
except ClientError as e:
error_code = e.response['Error']['Code']
if error_code == '404':
logger.error(f"S3 bucket not found: {self.bucket}")
return False
elif error_code == '403':
logger.error("S3 credentials invalid or insufficient permissions")
return False
else:
logger.error(f"S3 error during credential validation: {error_code}")
raise S3ConnectionError(f"S3 error: {error_code}") from e
except Exception as e:
logger.error(f"Unexpected error during S3 credential validation: {e}")
raise S3ConnectionError(f"S3 connection error: {e}") from e
def create_s3_client(self):
"""
Create a boto3 S3 client with this configuration.
Returns:
boto3.client instance configured for S3
"""
if not self.enabled:
raise S3CredentialsError("S3 backup is not enabled")
from botocore.config import Config
boto_config = Config(
connect_timeout=5,
read_timeout=self.download_timeout,
retries={'max_attempts': 0} # We handle retries manually
)
return boto3.client(
's3',
endpoint_url=self.endpoint_url,
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
region_name=self.region,
config=boto_config
)