""" Data Validation and Constraints Validate data before insertion/update """ import re from typing import Dict, List, Any class DataValidator: """Validate data against rules and constraints""" def validate_row(self, data: Dict, schema: List[Dict]) -> Dict: """Validate a row against schema""" errors = [] for column in schema: col_name = column['name'] col_type = column.get('type', 'TEXT') nullable = column.get('nullable', True) unique = column.get('unique', False) value = data.get(col_name) # Check nullable if not nullable and value is None: errors.append(f"{col_name}: Cannot be null") continue if value is not None: # Type validation type_error = self._validate_type(value, col_type) if type_error: errors.append(f"{col_name}: {type_error}") # Custom validators if 'validator' in column: validator_error = self._run_validator(value, column['validator']) if validator_error: errors.append(f"{col_name}: {validator_error}") if errors: return {'ok': False, 'errors': errors} return {'ok': True} def validate_email(self, email: str) -> bool: """Validate email format""" pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' return bool(re.match(pattern, email)) def validate_phone(self, phone: str) -> bool: """Validate phone number""" # Remove common separators cleaned = re.sub(r'[\s\-\(\)]', '', phone) # Check if it's digits and reasonable length return cleaned.isdigit() and 10 <= len(cleaned) <= 15 def validate_url(self, url: str) -> bool: """Validate URL format""" pattern = r'^https?://[^\s/$.?#].[^\s]*$' return bool(re.match(pattern, url)) def validate_credit_card(self, card: str) -> bool: """Validate credit card using Luhn algorithm""" # Remove spaces and dashes card = re.sub(r'[\s\-]', '', card) if not card.isdigit() or len(card) < 13 or len(card) > 19: return False # Luhn algorithm total = 0 reverse_digits = card[::-1] for i, digit in enumerate(reverse_digits): n = int(digit) if i % 2 == 1: n *= 2 if n > 9: n -= 9 total += n return total % 10 == 0 def validate_range(self, value: Any, min_val: Any = None, max_val: Any = None) -> bool: """Validate value is within range""" if min_val is not None and value < min_val: return False if max_val is not None and value > max_val: return False return True def validate_length(self, value: str, min_len: int = None, max_len: int = None) -> bool: """Validate string length""" length = len(value) if min_len is not None and length < min_len: return False if max_len is not None and length > max_len: return False return True def validate_pattern(self, value: str, pattern: str) -> bool: """Validate against regex pattern""" return bool(re.match(pattern, value)) def _validate_type(self, value: Any, expected_type: str) -> str: """Validate value type""" expected_type = expected_type.upper() if expected_type in ['INTEGER', 'INT']: if not isinstance(value, int): return f"Expected integer, got {type(value).__name__}" elif expected_type in ['DECIMAL', 'FLOAT', 'DOUBLE']: if not isinstance(value, (int, float)): return f"Expected number, got {type(value).__name__}" elif expected_type == 'BOOLEAN': if not isinstance(value, bool): return f"Expected boolean, got {type(value).__name__}" elif expected_type in ['VARCHAR', 'TEXT', 'STRING']: if not isinstance(value, str): return f"Expected string, got {type(value).__name__}" return None def _run_validator(self, value: Any, validator: Dict) -> str: """Run custom validator""" validator_type = validator.get('type') if validator_type == 'email': if not self.validate_email(value): return "Invalid email format" elif validator_type == 'phone': if not self.validate_phone(value): return "Invalid phone number" elif validator_type == 'url': if not self.validate_url(value): return "Invalid URL format" elif validator_type == 'range': min_val = validator.get('min') max_val = validator.get('max') if not self.validate_range(value, min_val, max_val): return f"Value must be between {min_val} and {max_val}" elif validator_type == 'length': min_len = validator.get('min') max_len = validator.get('max') if not self.validate_length(value, min_len, max_len): return f"Length must be between {min_len} and {max_len}" elif validator_type == 'pattern': pattern = validator.get('pattern') if not self.validate_pattern(value, pattern): return f"Value does not match required pattern" return None data_validator = DataValidator()