Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| """ | |
| BTA Thickness Prediction — Prophet Time-Series Version | |
| ====================================================== | |
| Strategy: Pure Time-Series Forecasting using Facebook Prophet. | |
| Predicts BTA thickness over time without using temperature data. | |
| Adheres to Clean Code principles. | |
| """ | |
| import os | |
| import sys | |
| import glob | |
| import json | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from prophet import Prophet | |
| from prophet.serialize import model_to_json | |
| CRITICAL_THRESHOLD_MM = 115.0 | |
| WARNING_THRESHOLD_MM = 130.0 | |
| FORECAST_DAYS = 90 | |
| DEFAULT_CSV_FILE = 'data-temp-clean.csv' | |
| def main(): | |
| """High-level orchestrator following the stepdown rule.""" | |
| print("BTA Prophet Forecasting Model Initialization...") | |
| try: | |
| csv_path = get_target_csv_path() | |
| df_cleaned = load_and_clean_data(csv_path) | |
| print(f"Loaded {len(df_cleaned)} actual manual measurement points.") | |
| print(f" Date range: {df_cleaned['tanggal_parsed'].min().date()} to {df_cleaned['tanggal_parsed'].max().date()}") | |
| print(f" Current thickness: {df_cleaned['ketebalan_parsed'].iloc[-1]} mm") | |
| prophet_df = prepare_prophet_dataframe(df_cleaned) | |
| model = train_prophet_model(prophet_df) | |
| forecast = forecast_thickness(model, days=FORECAST_DAYS) | |
| # Calculate remaining days from the last known actual measurement date | |
| last_measurement_date = prophet_df['ds'].max() | |
| days_remaining = estimate_days_to_threshold( | |
| forecast_df=forecast, | |
| current_date=last_measurement_date, | |
| threshold=CRITICAL_THRESHOLD_MM | |
| ) | |
| print_forecast_summary(forecast, days_remaining) | |
| print_forecast_table(historical_df=prophet_df, forecast_df=forecast) | |
| output_image_path = 'bta_prophet_predictions.png' | |
| plot_and_save_forecast(prophet_df, forecast, output_image_path) | |
| print(f"Prediction plot saved to '{output_image_path}'.") | |
| output_model_path = 'model_prophet_bta.json' | |
| save_model_json(model, output_model_path) | |
| print(f"Prophet model serialized and saved to '{output_model_path}'.") | |
| except Exception as error: | |
| print(f"Error during model execution: {error}", file=sys.stderr) | |
| sys.exit(1) | |
| def get_target_csv_path() -> str: | |
| """Finds target CSV file dynamically or from command line arguments.""" | |
| if len(sys.argv) > 1: | |
| provided_path = sys.argv[1] | |
| if not os.path.exists(provided_path): | |
| raise FileNotFoundError(f"Provided CSV file '{provided_path}' does not exist.") | |
| return provided_path | |
| csv_files = glob.glob('*.csv') + glob.glob('*.csv.csv') | |
| if not csv_files: | |
| raise FileNotFoundError("No CSV files found in the current directory.") | |
| # Standardize names and prioritize default file | |
| unique_files = list(set([os.path.basename(f) for f in csv_files])) | |
| # Try finding standard name variations | |
| for name in [DEFAULT_CSV_FILE, DEFAULT_CSV_FILE + '.csv', 'data-temp-clean.csv.csv']: | |
| if name in unique_files: | |
| return name | |
| return unique_files[0] | |
| def load_and_clean_data(file_path: str) -> pd.DataFrame: | |
| """Reads data, cleans spaces, and filters for valid manual measurements.""" | |
| df = pd.read_csv(file_path) | |
| df.columns = [str(col).strip() for col in df.columns] | |
| # Rename key columns for ease of access | |
| df = df.rename(columns={ | |
| 'Tanggal': 'tanggal_raw', | |
| 'Ketebalan BTA (mm)': 'ketebalan_raw' | |
| }) | |
| # Clean and parse types | |
| df['ketebalan_parsed'] = pd.to_numeric(df['ketebalan_raw'], errors='coerce') | |
| df['tanggal_parsed'] = pd.to_datetime(df['tanggal_raw'], errors='coerce') | |
| # Drop rows without valid actual measurements (only keep actual measurement dates) | |
| cleaned_df = df.dropna(subset=['tanggal_parsed', 'ketebalan_parsed']) | |
| # Sort chronologically | |
| return cleaned_df.sort_values('tanggal_parsed').reset_index(drop=True) | |
| def prepare_prophet_dataframe(df: pd.DataFrame) -> pd.DataFrame: | |
| """Formats DataFrame columns to Prophet expected names (ds and y).""" | |
| return df[['tanggal_parsed', 'ketebalan_parsed']].rename( | |
| columns={'tanggal_parsed': 'ds', 'ketebalan_parsed': 'y'} | |
| ) | |
| def train_prophet_model(df: pd.DataFrame) -> Prophet: | |
| """Trains a Prophet model with settings optimized for BTA wear dynamics.""" | |
| # Since BTA thickness wear is monotonic and non-seasonal, disable seasonalities | |
| model = Prophet( | |
| growth='linear', | |
| yearly_seasonality=False, | |
| weekly_seasonality=False, | |
| daily_seasonality=False | |
| ) | |
| model.fit(df) | |
| return model | |
| def forecast_thickness(model: Prophet, days: int) -> pd.DataFrame: | |
| """Forecasts BTA thickness into the future.""" | |
| future = model.make_future_dataframe(periods=days) | |
| return model.predict(future) | |
| def estimate_days_to_threshold(forecast_df: pd.DataFrame, current_date: pd.Timestamp, threshold: float) -> int: | |
| """Finds the number of days until the forecasted thickness crosses a threshold.""" | |
| critical_predictions = forecast_df[forecast_df['yhat'] <= threshold] | |
| if critical_predictions.empty: | |
| return FORECAST_DAYS | |
| earliest_critical_date = critical_predictions['ds'].min() | |
| days_remaining = (earliest_critical_date - current_date).days | |
| return max(0, days_remaining) | |
| def print_forecast_summary(forecast_df: pd.DataFrame, days_remaining: int): | |
| """Outputs text summary of the forecast details.""" | |
| last_prediction = forecast_df.iloc[-1] | |
| last_date = last_prediction['ds'].date() | |
| predicted_thickness = last_prediction['yhat'] | |
| print("\n" + "="*50) | |
| print(f"PROPHET FORECAST RESULTS (Next {FORECAST_DAYS} days)") | |
| print("="*50) | |
| print(f" Target Date : {last_date}") | |
| print(f" Predicted Thickness : {predicted_thickness:.2f} mm") | |
| print(f" Confidence Interval : [{last_prediction['yhat_lower']:.2f} - {last_prediction['yhat_upper']:.2f}] mm") | |
| print(f" Estimated Days to {CRITICAL_THRESHOLD_MM}mm: about {days_remaining} days") | |
| print("="*50 + "\n") | |
| def print_forecast_table(historical_df: pd.DataFrame, forecast_df: pd.DataFrame): | |
| """Prints a sequential (runtut) table containing both historical actual measurements and future predictions, matching the timeline of the line chart.""" | |
| # Merge historical actual 'y' onto forecast_df | |
| merged_df = pd.merge( | |
| forecast_df[['ds', 'yhat', 'yhat_lower', 'yhat_upper']], | |
| historical_df[['ds', 'y']], | |
| on='ds', | |
| how='left' | |
| ) | |
| # Rename columns for presentation | |
| display_df = merged_df.rename(columns={ | |
| 'ds': 'Date', | |
| 'y': 'Actual (mm)', | |
| 'yhat': 'Predicted (mm)', | |
| 'yhat_lower': 'Lower Bound (mm)', | |
| 'yhat_upper': 'Upper Bound (mm)' | |
| }) | |
| # Format Date | |
| display_df['Date'] = display_df['Date'].dt.date | |
| # Format numbers | |
| for column in ['Predicted (mm)', 'Lower Bound (mm)', 'Upper Bound (mm)']: | |
| display_df[column] = display_df[column].round(2) | |
| # Format actual values (replace NaN with '-' for clean output) | |
| display_df['Actual (mm)'] = display_df['Actual (mm)'].apply( | |
| lambda val: f"{val:.1f}" if pd.notna(val) else "-" | |
| ) | |
| # Reorder columns to put Actual next to Date | |
| cols = ['Date', 'Actual (mm)', 'Predicted (mm)', 'Lower Bound (mm)', 'Upper Bound (mm)'] | |
| display_df = display_df[cols] | |
| # Configure pandas to print the full dataframe without truncation | |
| pd.set_option('display.max_rows', 150) | |
| print("CHRONOLOGICAL BTA THICKNESS DATA & FORECAST (Runtut):") | |
| print(display_df.to_string(index=False)) | |
| print("="*50 + "\n") | |
| def plot_and_save_forecast(historical_df: pd.DataFrame, forecast_df: pd.DataFrame, output_path: str): | |
| """Generates and saves visual report comparing historical data and future predictions.""" | |
| sns.set_theme(style='whitegrid') | |
| fig, ax = plt.subplots(figsize=(14, 7)) | |
| # Plot historical actual measurements | |
| ax.scatter( | |
| historical_df['ds'], | |
| historical_df['y'], | |
| color='royalblue', | |
| s=70, | |
| label='Actual Measurement (Manual)', | |
| zorder=5 | |
| ) | |
| # Plot predicted values | |
| ax.plot( | |
| forecast_df['ds'], | |
| forecast_df['yhat'], | |
| color='darkorange', | |
| linewidth=2, | |
| label='Predicted Trend (Prophet)', | |
| zorder=4 | |
| ) | |
| # Plot uncertainty interval | |
| ax.fill_between( | |
| forecast_df['ds'], | |
| forecast_df['yhat_lower'], | |
| forecast_df['yhat_upper'], | |
| color='darkorange', | |
| alpha=0.15, | |
| label='Uncertainty Interval (Confidence Interval)' | |
| ) | |
| # Draw operational thresholds | |
| ax.axhline( | |
| y=CRITICAL_THRESHOLD_MM, | |
| color='red', | |
| linestyle='--', | |
| linewidth=1.5, | |
| label=f'Critical Threshold ({CRITICAL_THRESHOLD_MM} mm)' | |
| ) | |
| ax.axhline( | |
| y=WARNING_THRESHOLD_MM, | |
| color='orange', | |
| linestyle=':', | |
| linewidth=1.5, | |
| label=f'Warning Threshold ({WARNING_THRESHOLD_MM} mm)' | |
| ) | |
| # Highlight final data anchor | |
| last_actual_date = historical_df['ds'].max() | |
| ax.axvline( | |
| x=last_actual_date, | |
| color='gray', | |
| linestyle=':', | |
| alpha=0.8, | |
| label='Last Known Measurement' | |
| ) | |
| ax.set_title('BTA Thickness Forecasting — Prophet Time-Series Model', fontsize=14, fontweight='bold') | |
| ax.set_ylabel('Thickness (mm)') | |
| ax.set_xlabel('Date') | |
| ax.legend(loc='upper right', frameon=True) | |
| ax.set_ylim(90, 245) | |
| plt.tight_layout() | |
| plt.savefig(output_path, dpi=150) | |
| plt.close() | |
| def save_model_json(model: Prophet, filepath: str): | |
| """Serializes Prophet model to a portable JSON format.""" | |
| with open(filepath, 'w') as out_file: | |
| json.dump(model_to_json(model), out_file) | |
| if __name__ == '__main__': | |
| main() | |