corpusdb / app /data_validator.py
mrsavage1's picture
Upload 52 files
723f9ab verified
"""
Data Validation and Constraints
Validate data before insertion/update
"""
import re
from typing import Dict, List, Any
class DataValidator:
"""Validate data against rules and constraints"""
def validate_row(self, data: Dict, schema: List[Dict]) -> Dict:
"""Validate a row against schema"""
errors = []
for column in schema:
col_name = column['name']
col_type = column.get('type', 'TEXT')
nullable = column.get('nullable', True)
unique = column.get('unique', False)
value = data.get(col_name)
# Check nullable
if not nullable and value is None:
errors.append(f"{col_name}: Cannot be null")
continue
if value is not None:
# Type validation
type_error = self._validate_type(value, col_type)
if type_error:
errors.append(f"{col_name}: {type_error}")
# Custom validators
if 'validator' in column:
validator_error = self._run_validator(value, column['validator'])
if validator_error:
errors.append(f"{col_name}: {validator_error}")
if errors:
return {'ok': False, 'errors': errors}
return {'ok': True}
def validate_email(self, email: str) -> bool:
"""Validate email format"""
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return bool(re.match(pattern, email))
def validate_phone(self, phone: str) -> bool:
"""Validate phone number"""
# Remove common separators
cleaned = re.sub(r'[\s\-\(\)]', '', phone)
# Check if it's digits and reasonable length
return cleaned.isdigit() and 10 <= len(cleaned) <= 15
def validate_url(self, url: str) -> bool:
"""Validate URL format"""
pattern = r'^https?://[^\s/$.?#].[^\s]*$'
return bool(re.match(pattern, url))
def validate_credit_card(self, card: str) -> bool:
"""Validate credit card using Luhn algorithm"""
# Remove spaces and dashes
card = re.sub(r'[\s\-]', '', card)
if not card.isdigit() or len(card) < 13 or len(card) > 19:
return False
# Luhn algorithm
total = 0
reverse_digits = card[::-1]
for i, digit in enumerate(reverse_digits):
n = int(digit)
if i % 2 == 1:
n *= 2
if n > 9:
n -= 9
total += n
return total % 10 == 0
def validate_range(self, value: Any, min_val: Any = None, max_val: Any = None) -> bool:
"""Validate value is within range"""
if min_val is not None and value < min_val:
return False
if max_val is not None and value > max_val:
return False
return True
def validate_length(self, value: str, min_len: int = None, max_len: int = None) -> bool:
"""Validate string length"""
length = len(value)
if min_len is not None and length < min_len:
return False
if max_len is not None and length > max_len:
return False
return True
def validate_pattern(self, value: str, pattern: str) -> bool:
"""Validate against regex pattern"""
return bool(re.match(pattern, value))
def _validate_type(self, value: Any, expected_type: str) -> str:
"""Validate value type"""
expected_type = expected_type.upper()
if expected_type in ['INTEGER', 'INT']:
if not isinstance(value, int):
return f"Expected integer, got {type(value).__name__}"
elif expected_type in ['DECIMAL', 'FLOAT', 'DOUBLE']:
if not isinstance(value, (int, float)):
return f"Expected number, got {type(value).__name__}"
elif expected_type == 'BOOLEAN':
if not isinstance(value, bool):
return f"Expected boolean, got {type(value).__name__}"
elif expected_type in ['VARCHAR', 'TEXT', 'STRING']:
if not isinstance(value, str):
return f"Expected string, got {type(value).__name__}"
return None
def _run_validator(self, value: Any, validator: Dict) -> str:
"""Run custom validator"""
validator_type = validator.get('type')
if validator_type == 'email':
if not self.validate_email(value):
return "Invalid email format"
elif validator_type == 'phone':
if not self.validate_phone(value):
return "Invalid phone number"
elif validator_type == 'url':
if not self.validate_url(value):
return "Invalid URL format"
elif validator_type == 'range':
min_val = validator.get('min')
max_val = validator.get('max')
if not self.validate_range(value, min_val, max_val):
return f"Value must be between {min_val} and {max_val}"
elif validator_type == 'length':
min_len = validator.get('min')
max_len = validator.get('max')
if not self.validate_length(value, min_len, max_len):
return f"Length must be between {min_len} and {max_len}"
elif validator_type == 'pattern':
pattern = validator.get('pattern')
if not self.validate_pattern(value, pattern):
return f"Value does not match required pattern"
return None
data_validator = DataValidator()