enginevibration / generate_data.py
GitHub Actions
Deploy from GitHub Actions
a1edd1c
# --- generate_data.py (Regression Version) ---
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
def generate_data(filename="raw_data.csv", total_rows=10000):
"""
Generates a dataset where vibration is a function of multiple operating conditions.
"""
data_dir = 'data'
os.makedirs(data_dir, exist_ok=True)
filepath = os.path.join(data_dir, filename)
print(f"Generating regression dataset with {total_rows} rows...")
# Simulate different conditions
rpms = np.random.randint(800, 4500, total_rows)
ambient_temps = np.random.uniform(28, 35, total_rows) # Tropical temps
fuel_levels = np.random.uniform(10, 100, total_rows) # Percentage
# 0=Calm, 1=Choppy, 2=Stormy
sea_states = np.random.choice([0, 1, 2], total_rows, p=[0.6, 0.3, 0.1])
# --- The Core Logic: Vibration is a function of conditions ---
# Base vibration increases with the square of RPM
base_vibration = 9.8 + (rpms / 1000)**2
# Temperature adds a small linear effect
temp_effect = (ambient_temps - 28) * 0.05
# Sea state adds random noise
sea_effect = np.random.normal(0, sea_states * 0.2)
# A developing fault adds a slow, growing trend over time
fault_trend = np.linspace(0, 1, total_rows) * 3.0
# Combine all effects to get the final measured vibration
az_vibration = base_vibration + temp_effect + sea_effect + fault_trend
df = pd.DataFrame({
'rpm': rpms,
'ambient_temp_c': ambient_temps,
'fuel_level_percent': fuel_levels,
'sea_state': sea_states,
'az_vibration_actual': az_vibration
})
df.to_csv(filepath, index=False)
print(f"Successfully generated and saved data to '{filepath}'")
if __name__ == "__main__":
generate_data()