import pandas as pd def master_data_csv_to_parquet(file_path: str): """ Convert master data export from a CSV to a Parquet file. """ save_path = file_path.replace(".csv", ".parquet") categorical_columns = [ "Monitoring_Location_ID", "Activity_Depth_Unit", "Sample_Position", "Time_Zone", "Activity_Type", "Waterbody_Class", "WBID", "Name", "Sector", "Total_Depth_Unit", "Org_Analyte_Name", ] dtype_dict = { "Station_Number": str, **{col: "category" for col in categorical_columns}, } return ( pd.read_csv(file_path, dtype=dtype_dict, low_memory=False) .assign( Org_Result_Value=lambda df: pd.to_numeric( df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce" ), Activity_Start_Date_Time=lambda df: pd.to_datetime( df["Activity_Start_Date_Time"].apply( lambda x: f"{x} 12:00:00" if len(str(x).strip()) <= 10 else x ), errors="coerce", ), Org_Result_Unit=lambda df: df["Org_Result_Unit"].str.replace("none", ""), Station_Number=lambda x: x["Station_Number"].apply( lambda s: f"{float(s):.2f}" if pd.notna(s) and str(s).strip() != "" else s ), ) .to_parquet(save_path) )