monte-carlo-sim / mcp_server /prepare_data.py
surfiniaburger's picture
Setup Monte Carlo MCP Server with Git LFS
215dd01
import os
import pandas as pd
from data_handling.data_downloader import download_and_move_files
from data_handling.data_loader import unzip_data
from data_handling.telemetry_parser import parse_telemetry
def main():
"""
Prepares the data for the TFX pipeline by downloading, unzipping,
parsing, and saving it as a CSV file.
"""
script_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.join(script_dir, 'data')
unzipped_data_dir = os.path.join(script_dir, 'unzipped_data')
csv_output_dir = os.path.join(unzipped_data_dir, 'barber-motorsports-park', 'barber')
# 1. Download and unzip data if not already present
if not os.path.exists(data_dir):
print("Downloading data...")
download_and_move_files(data_dir)
else:
print("Data directory already exists.")
if not os.path.exists(unzipped_data_dir):
print("Unzipping data...")
unzip_data(data_dir, unzipped_data_dir)
else:
print("Unzipped data directory already exists.")
# 2. Parse the race data
print("Parsing telemetry data...")
race_data = parse_telemetry(csv_output_dir)
if race_data is not None:
# 3. Save the parsed data to a CSV file
csv_output_path = os.path.join(csv_output_dir, 'data.csv')
print(f"Saving parsed data to {csv_output_path}...")
race_data.to_csv(csv_output_path, index=False)
print("Data preparation complete.")
else:
print("Failed to parse race data. Data preparation failed.")
if __name__ == "__main__":
main()