File size: 3,970 Bytes
652aca5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""Data preprocessing utilities"""
import pandas as pd
import numpy as np
from datetime import datetime
import logging

logger = logging.getLogger(__name__)


def validate_lab_data(data: dict) -> tuple:
    """
    Validate laboratory data
    
    Returns:
        (is_valid, error_message)
    """
    required_fields = ['test_date']
    
    for field in required_fields:
        if field not in data:
            return False, f"Missing required field: {field}"
    
    # Validate numeric ranges
    validations = {
        'glucose': (0, 500),
        'hba1c': (0, 20),
        'cholesterol': (0, 500),
        'hdl': (0, 200),
        'ldl': (0, 400),
        'triglycerides': (0, 1000),
        'blood_pressure_systolic': (50, 250),
        'blood_pressure_diastolic': (30, 150),
        'heart_rate': (30, 200)
    }
    
    for field, (min_val, max_val) in validations.items():
        if field in data:
            value = data[field]
            if not isinstance(value, (int, float)):
                return False, f"{field} must be a number"
            if not (min_val <= value <= max_val):
                return False, f"{field} must be between {min_val} and {max_val}"
    
    return True, ""


def validate_lifestyle_data(data: dict) -> tuple:
    """
    Validate lifestyle data
    
    Returns:
        (is_valid, error_message)
    """
    required_fields = ['date']
    
    for field in required_fields:
        if field not in data:
            return False, f"Missing required field: {field}"
    
    # Validate numeric ranges
    if 'sleep_hours' in data:
        if not (0 <= data['sleep_hours'] <= 24):
            return False, "sleep_hours must be between 0 and 24"
    
    if 'exercise_minutes' in data:
        if not (0 <= data['exercise_minutes'] <= 1440):
            return False, "exercise_minutes must be between 0 and 1440"
    
    if 'steps' in data:
        if not (0 <= data['steps'] <= 100000):
            return False, "steps must be between 0 and 100000"
    
    # Validate categorical fields
    if 'sleep_quality' in data:
        valid_values = ['poor', 'fair', 'good', 'excellent']
        if data['sleep_quality'] not in valid_values:
            return False, f"sleep_quality must be one of: {valid_values}"
    
    if 'diet_quality' in data:
        valid_values = ['poor', 'fair', 'balanced', 'excellent']
        if data['diet_quality'] not in valid_values:
            return False, f"diet_quality must be one of: {valid_values}"
    
    return True, ""


def validate_mental_health_data(data: dict) -> tuple:
    """
    Validate mental health data
    
    Returns:
        (is_valid, error_message)
    """
    required_fields = ['date']
    
    for field in required_fields:
        if field not in data:
            return False, f"Missing required field: {field}"
    
    # Validate scale values
    if 'stress_level' in data:
        if not (1 <= data['stress_level'] <= 10):
            return False, "stress_level must be between 1 and 10"
    
    if 'anxiety_level' in data:
        if not (1 <= data['anxiety_level'] <= 10):
            return False, "anxiety_level must be between 1 and 10"
    
    # Validate categorical fields
    if 'mood' in data:
        valid_values = ['depressed', 'low', 'neutral', 'good', 'excellent']
        if data['mood'] not in valid_values:
            return False, f"mood must be one of: {valid_values}"
    
    return True, ""


def sanitize_input(data: dict) -> dict:
    """Remove any potentially harmful data from input"""
    sanitized = {}
    
    for key, value in data.items():
        # Skip None values
        if value is None:
            continue
        
        # Convert strings and remove scripts
        if isinstance(value, str):
            value = value.strip()
            # Basic XSS prevention
            value = value.replace('<', '&lt;').replace('>', '&gt;')
        
        sanitized[key] = value
    
    return sanitized