Spaces:
Runtime error
Runtime error
| """Data preprocessing utilities""" | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| def validate_lab_data(data: dict) -> tuple: | |
| """ | |
| Validate laboratory data | |
| Returns: | |
| (is_valid, error_message) | |
| """ | |
| required_fields = ['test_date'] | |
| for field in required_fields: | |
| if field not in data: | |
| return False, f"Missing required field: {field}" | |
| # Validate numeric ranges | |
| validations = { | |
| 'glucose': (0, 500), | |
| 'hba1c': (0, 20), | |
| 'cholesterol': (0, 500), | |
| 'hdl': (0, 200), | |
| 'ldl': (0, 400), | |
| 'triglycerides': (0, 1000), | |
| 'blood_pressure_systolic': (50, 250), | |
| 'blood_pressure_diastolic': (30, 150), | |
| 'heart_rate': (30, 200) | |
| } | |
| for field, (min_val, max_val) in validations.items(): | |
| if field in data: | |
| value = data[field] | |
| if not isinstance(value, (int, float)): | |
| return False, f"{field} must be a number" | |
| if not (min_val <= value <= max_val): | |
| return False, f"{field} must be between {min_val} and {max_val}" | |
| return True, "" | |
| def validate_lifestyle_data(data: dict) -> tuple: | |
| """ | |
| Validate lifestyle data | |
| Returns: | |
| (is_valid, error_message) | |
| """ | |
| required_fields = ['date'] | |
| for field in required_fields: | |
| if field not in data: | |
| return False, f"Missing required field: {field}" | |
| # Validate numeric ranges | |
| if 'sleep_hours' in data: | |
| if not (0 <= data['sleep_hours'] <= 24): | |
| return False, "sleep_hours must be between 0 and 24" | |
| if 'exercise_minutes' in data: | |
| if not (0 <= data['exercise_minutes'] <= 1440): | |
| return False, "exercise_minutes must be between 0 and 1440" | |
| if 'steps' in data: | |
| if not (0 <= data['steps'] <= 100000): | |
| return False, "steps must be between 0 and 100000" | |
| # Validate categorical fields | |
| if 'sleep_quality' in data: | |
| valid_values = ['poor', 'fair', 'good', 'excellent'] | |
| if data['sleep_quality'] not in valid_values: | |
| return False, f"sleep_quality must be one of: {valid_values}" | |
| if 'diet_quality' in data: | |
| valid_values = ['poor', 'fair', 'balanced', 'excellent'] | |
| if data['diet_quality'] not in valid_values: | |
| return False, f"diet_quality must be one of: {valid_values}" | |
| return True, "" | |
| def validate_mental_health_data(data: dict) -> tuple: | |
| """ | |
| Validate mental health data | |
| Returns: | |
| (is_valid, error_message) | |
| """ | |
| required_fields = ['date'] | |
| for field in required_fields: | |
| if field not in data: | |
| return False, f"Missing required field: {field}" | |
| # Validate scale values | |
| if 'stress_level' in data: | |
| if not (1 <= data['stress_level'] <= 10): | |
| return False, "stress_level must be between 1 and 10" | |
| if 'anxiety_level' in data: | |
| if not (1 <= data['anxiety_level'] <= 10): | |
| return False, "anxiety_level must be between 1 and 10" | |
| # Validate categorical fields | |
| if 'mood' in data: | |
| valid_values = ['depressed', 'low', 'neutral', 'good', 'excellent'] | |
| if data['mood'] not in valid_values: | |
| return False, f"mood must be one of: {valid_values}" | |
| return True, "" | |
| def sanitize_input(data: dict) -> dict: | |
| """Remove any potentially harmful data from input""" | |
| sanitized = {} | |
| for key, value in data.items(): | |
| # Skip None values | |
| if value is None: | |
| continue | |
| # Convert strings and remove scripts | |
| if isinstance(value, str): | |
| value = value.strip() | |
| # Basic XSS prevention | |
| value = value.replace('<', '<').replace('>', '>') | |
| sanitized[key] = value | |
| return sanitized | |