File size: 6,109 Bytes
d2173d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
Download NASA Turbofan Engine Degradation Dataset
This dataset simulates engine sensor data with degradation patterns
"""
import os
import zipfile
import requests
from pathlib import Path
from tqdm import tqdm


def download_file(url, destination):
    """Download file with progress bar"""
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    
    with open(destination, 'wb') as file, tqdm(
        desc=destination.name,
        total=total_size,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as progress_bar:
        for data in response.iter_content(chunk_size=1024):
            size = file.write(data)
            progress_bar.update(size)


def download_nasa_turbofan_data(data_dir='data/raw'):
    """
    Download NASA Turbofan Engine Degradation Simulation Data Set
    Source: https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/
    """
    data_path = Path(data_dir)
    data_path.mkdir(parents=True, exist_ok=True)
    
    # NASA C-MAPSS Dataset URL
    url = "https://ti.arc.nasa.gov/c/6/"
    
    print("Downloading NASA Turbofan Engine Degradation Dataset...")
    print("This dataset contains simulated engine sensor data with degradation patterns")
    
    # Alternative: Use a direct download link or create synthetic data
    # Since the NASA link requires manual download, we'll create a synthetic dataset
    print("\nNote: Creating synthetic vehicle sensor dataset based on NASA patterns...")
    
    return create_synthetic_vehicle_data(data_path)


def create_synthetic_vehicle_data(data_path):
    """
    Create synthetic vehicle sensor data with realistic patterns
    Simulates: engine temp, RPM, speed, battery voltage, oil pressure, etc.
    """
    import numpy as np
    import pandas as pd
    
    print("Generating synthetic vehicle sensor data...")
    
    np.random.seed(42)
    
    # Number of vehicles and time steps
    n_vehicles = 100
    n_timesteps = 500
    
    datasets = {}
    
    for vehicle_id in range(1, n_vehicles + 1):
        data = []
        
        # Determine if vehicle will have anomaly
        has_anomaly = np.random.rand() > 0.7  # 30% have anomalies
        anomaly_start = np.random.randint(300, 450) if has_anomaly else n_timesteps + 1
        
        for t in range(n_timesteps):
            # Base sensor readings with some noise
            base_engine_temp = 90 + np.random.normal(0, 5)
            base_rpm = 2000 + np.random.normal(0, 200)
            base_speed = 60 + np.random.normal(0, 10)
            base_battery = 12.6 + np.random.normal(0, 0.2)
            base_oil_pressure = 40 + np.random.normal(0, 3)
            base_coolant_temp = 85 + np.random.normal(0, 4)
            base_fuel_pressure = 50 + np.random.normal(0, 2)
            base_throttle = 50 + np.random.normal(0, 10)
            base_brake_temp = 150 + np.random.normal(0, 15)
            base_tire_pressure_fl = 32 + np.random.normal(0, 0.5)
            base_tire_pressure_fr = 32 + np.random.normal(0, 0.5)
            base_tire_pressure_rl = 32 + np.random.normal(0, 0.5)
            base_tire_pressure_rr = 32 + np.random.normal(0, 0.5)
            base_vibration = 0.5 + np.random.normal(0, 0.1)
            
            # Introduce anomalies after anomaly_start
            if t >= anomaly_start:
                degradation_factor = (t - anomaly_start) / 100
                
                # Engine overheating
                base_engine_temp += degradation_factor * 20
                base_coolant_temp += degradation_factor * 15
                
                # Oil pressure drop
                base_oil_pressure -= degradation_factor * 10
                
                # Battery degradation
                base_battery -= degradation_factor * 0.5
                
                # Increased vibration
                base_vibration += degradation_factor * 0.3
                
                # Tire pressure issues
                if np.random.rand() > 0.8:
                    base_tire_pressure_fl -= degradation_factor * 2
            
            # Create data point
            data_point = {
                'vehicle_id': vehicle_id,
                'timestamp': t,
                'engine_temp': max(0, base_engine_temp),
                'rpm': max(0, base_rpm),
                'speed': max(0, base_speed),
                'battery_voltage': max(0, base_battery),
                'oil_pressure': max(0, base_oil_pressure),
                'coolant_temp': max(0, base_coolant_temp),
                'fuel_pressure': max(0, base_fuel_pressure),
                'throttle_position': np.clip(base_throttle, 0, 100),
                'brake_temp': max(0, base_brake_temp),
                'tire_pressure_fl': max(0, base_tire_pressure_fl),
                'tire_pressure_fr': max(0, base_tire_pressure_fr),
                'tire_pressure_rl': max(0, base_tire_pressure_rl),
                'tire_pressure_rr': max(0, base_tire_pressure_rr),
                'vibration_level': max(0, base_vibration),
                'anomaly': 1 if t >= anomaly_start else 0
            }
            data.append(data_point)
        
        datasets[f'vehicle_{vehicle_id}'] = pd.DataFrame(data)
    
    # Combine all vehicles into one dataset
    full_dataset = pd.concat(datasets.values(), ignore_index=True)
    
    # Save to CSV
    output_file = data_path / 'vehicle_sensor_data.csv'
    full_dataset.to_csv(output_file, index=False)
    print(f"✓ Saved synthetic vehicle sensor data to {output_file}")
    print(f"  - Total records: {len(full_dataset)}")
    print(f"  - Vehicles: {n_vehicles}")
    print(f"  - Timesteps per vehicle: {n_timesteps}")
    print(f"  - Anomaly rate: ~30%")
    
    # Create summary statistics
    summary = full_dataset.groupby('vehicle_id')['anomaly'].sum()
    vehicles_with_anomalies = (summary > 0).sum()
    print(f"  - Vehicles with anomalies: {vehicles_with_anomalies}/{n_vehicles}")
    
    return output_file


if __name__ == '__main__':
    download_nasa_turbofan_data()