Spaces:

AK51
/

SolarImageDownloader

Sleeping

App Files Files Community

SolarImageDownloader / src /storage /validation_service.py

AK51

Upload 13308 files

b610d23 verified about 2 months ago

raw

history blame contribute delete

9.64 kB

	"""File integrity and validation service for NASA solar images."""

	import hashlib
	import logging
	from pathlib import Path
	from typing import Optional, Tuple
	from PIL import Image
	import io

	from ..models import ImageMetadata


	class ValidationService:
	"""Validates downloaded images for integrity and format."""

	def __init__(self):
	"""Initialize validation service."""
	self.logger = logging.getLogger(__name__)

	def validate_image_format(self, file_path: Path) -> Tuple[bool, Optional[str]]:
	"""
	Validate that a file is a valid JPEG image.

	Args:
	file_path: Path to the image file

	Returns:
	Tuple of (is_valid, error_message)
	"""
	try:
	with Image.open(file_path) as img:
	# Check if it's a JPEG
	if img.format != 'JPEG':
	return False, f"Expected JPEG format, got {img.format}"

	# Verify image can be loaded completely
	img.verify()

	self.logger.debug(f"Image format validation passed: {file_path}")
	return True, None

	except Exception as e:
	error_msg = f"Image format validation failed: {str(e)}"
	self.logger.warning(f"{error_msg} for {file_path}")
	return False, error_msg

	def calculate_file_hash(self, file_path: Path, algorithm: str = 'md5') -> Optional[str]:
	"""
	Calculate hash of a file for integrity checking.

	Args:
	file_path: Path to the file
	algorithm: Hash algorithm ('md5', 'sha256', etc.)

	Returns:
	Hex digest of the hash, or None if error
	"""
	try:
	hash_obj = hashlib.new(algorithm)

	with open(file_path, 'rb') as f:
	# Read in chunks to handle large files
	for chunk in iter(lambda: f.read(8192), b""):
	hash_obj.update(chunk)

	return hash_obj.hexdigest()

	except Exception as e:
	self.logger.error(f"Error calculating hash for {file_path}: {e}")
	return None

	def validate_file_size(self, file_path: Path, expected_size: int, tolerance: int = 0) -> Tuple[bool, Optional[str]]:
	"""
	Validate file size matches expected size.

	Args:
	file_path: Path to the file
	expected_size: Expected file size in bytes
	tolerance: Allowed difference in bytes

	Returns:
	Tuple of (is_valid, error_message)
	"""
	try:
	actual_size = file_path.stat().st_size
	size_diff = abs(actual_size - expected_size)

	if size_diff <= tolerance:
	self.logger.debug(f"File size validation passed: {file_path} ({actual_size} bytes)")
	return True, None
	else:
	error_msg = f"Size mismatch: expected {expected_size}, got {actual_size} (diff: {size_diff})"
	self.logger.warning(f"File size validation failed: {error_msg} for {file_path}")
	return False, error_msg

	except Exception as e:
	error_msg = f"Error checking file size: {str(e)}"
	self.logger.error(f"{error_msg} for {file_path}")
	return False, error_msg

	def validate_image_content(self, file_path: Path) -> Tuple[bool, Optional[str]]:
	"""
	Validate image content and extract basic properties.

	Args:
	file_path: Path to the image file

	Returns:
	Tuple of (is_valid, error_message)
	"""
	try:
	with Image.open(file_path) as img:
	# Check image dimensions (NASA SDO images should be reasonable size)
	width, height = img.size

	if width < 100 or height < 100:
	return False, f"Image too small: {width}x{height}"

	if width > 10000 or height > 10000:
	return False, f"Image too large: {width}x{height}"

	# Check if image has reasonable color depth
	if img.mode not in ['RGB', 'L', 'RGBA']:
	return False, f"Unexpected color mode: {img.mode}"

	self.logger.debug(f"Image content validation passed: {file_path} ({width}x{height}, {img.mode})")
	return True, None

	except Exception as e:
	error_msg = f"Image content validation failed: {str(e)}"
	self.logger.warning(f"{error_msg} for {file_path}")
	return False, error_msg

	def comprehensive_validation(self, file_path: Path, expected_size: Optional[int] = None) -> Tuple[bool, list]:
	"""
	Perform comprehensive validation of an image file.

	Args:
	file_path: Path to the image file
	expected_size: Expected file size in bytes (optional)

	Returns:
	Tuple of (all_valid, list_of_errors)
	"""
	errors = []

	# Check if file exists
	if not file_path.exists():
	errors.append(f"File does not exist: {file_path}")
	return False, errors

	# Check if file is not empty
	if file_path.stat().st_size == 0:
	errors.append(f"File is empty: {file_path}")
	return False, errors

	# Validate file size if expected size provided
	if expected_size is not None:
	size_valid, size_error = self.validate_file_size(file_path, expected_size)
	if not size_valid:
	errors.append(size_error)

	# Validate image format
	format_valid, format_error = self.validate_image_format(file_path)
	if not format_valid:
	errors.append(format_error)

	# Validate image content
	content_valid, content_error = self.validate_image_content(file_path)
	if not content_valid:
	errors.append(content_error)

	all_valid = len(errors) == 0

	if all_valid:
	self.logger.info(f"Comprehensive validation passed: {file_path}")
	else:
	self.logger.warning(f"Comprehensive validation failed for {file_path}: {errors}")

	return all_valid, errors

	def repair_corrupted_image(self, file_path: Path) -> bool:
	"""
	Attempt to repair a corrupted image file.

	Args:
	file_path: Path to the corrupted image

	Returns:
	True if repair was successful, False otherwise
	"""
	try:
	# Try to open and re-save the image
	with Image.open(file_path) as img:
	# Create a backup
	backup_path = file_path.with_suffix('.backup')
	file_path.rename(backup_path)

	# Save the image again (this can fix minor corruption)
	img.save(file_path, 'JPEG', quality=95)

	# Validate the repaired image
	is_valid, _ = self.validate_image_format(file_path)

	if is_valid:
	# Remove backup if repair successful
	backup_path.unlink()
	self.logger.info(f"Successfully repaired image: {file_path}")
	return True
	else:
	# Restore backup if repair failed
	file_path.unlink()
	backup_path.rename(file_path)
	self.logger.warning(f"Failed to repair image: {file_path}")
	return False

	except Exception as e:
	self.logger.error(f"Error attempting to repair {file_path}: {e}")
	return False

	def get_image_info(self, file_path: Path) -> Optional[dict]:
	"""
	Extract detailed information about an image file.

	Args:
	file_path: Path to the image file

	Returns:
	Dictionary with image information, or None if error
	"""
	try:
	with Image.open(file_path) as img:
	info = {
	'filename': file_path.name,
	'format': img.format,
	'mode': img.mode,
	'size': img.size,
	'width': img.size[0],
	'height': img.size[1],
	'file_size': file_path.stat().st_size,
	'has_transparency': img.mode in ['RGBA', 'LA'] or 'transparency' in img.info
	}

	# Add EXIF data if available
	if hasattr(img, '_getexif') and img._getexif():
	info['has_exif'] = True
	else:
	info['has_exif'] = False

	return info

	except Exception as e:
	self.logger.error(f"Error getting image info for {file_path}: {e}")
	return None