import logging from dataclasses import dataclass from typing import Dict, Optional, Tuple import pandas as pd from config import Config from data_processor import DataProcessor from evaluator import Evaluator from prophet_predictor import ProphetPredictor from utils import setup_logging setup_logging("INFO") logger = logging.getLogger("Backend") @dataclass class PredictionResult: summary_data: Dict predictions_df: pd.DataFrame comparison_df: Optional[pd.DataFrame] has_actual_data: bool error: Optional[str] = None @dataclass class ForecastResult: summary_data: Dict forecast_df: pd.DataFrame yearly_summary: pd.DataFrame error: Optional[str] = None class PredictionBackend: def __init__(self): self._processor: Optional[DataProcessor] = None self._predictor: Optional[ProphetPredictor] = None self._config: Optional[Config] = None self._df_enrollment: Optional[pd.DataFrame] = None self._elective_codes: Optional[set] = None self._backtest_metrics: Optional[dict] = None self._initialized: bool = False @property def is_initialized(self) -> bool: return self._initialized @property def config(self) -> Optional[Config]: return self._config def initialize(self) -> bool: try: logger.info("Initializing prediction system...") self._config = Config() self._processor = DataProcessor(self._config) self._df_enrollment, self._elective_codes = ( self._processor.load_and_process() ) self._predictor = ProphetPredictor(self._config) self._predictor.train_student_population_model( self._processor.raw_data["students_yearly"] ) self._initialized = True logger.info("System initialized successfully") return True except Exception as e: logger.error(f"Failed to initialize system: {e}", exc_info=True) self._initialized = False return False def get_data_info(self) -> Dict: if not self._initialized or self._processor is None or self._config is None: return {"error": "System not initialized"} try: courses = self._processor.raw_data.get("courses") students = self._processor.raw_data.get("students_yearly") if courses is None or students is None: return {"error": "Data not loaded"} elective_courses = courses[courses["kategori_mk"] == "P"] return { "total_courses": len(courses), "elective_courses": len(elective_courses), "class_capacity": self._config.class_capacity.DEFAULT_CLASS_CAPACITY, "year_min": int(students["thn"].min()), "year_max": int(students["thn"].max()), } except Exception as e: return {"error": str(e)} def _run_backtest_if_needed(self) -> Dict: if self._backtest_metrics is not None: return self._backtest_metrics if ( self._config is None or self._df_enrollment is None or self._predictor is None ): logger.warning("System not initialized, using default metrics") self._backtest_metrics = {"mae": 0, "rmse": 0} return self._backtest_metrics logger.info("Running backtest for the first time...") evaluator = Evaluator(self._config) backtest_results = evaluator.run_backtest(self._df_enrollment, self._predictor) if backtest_results is None or len(backtest_results) == 0: logger.warning("Backtest returned no results, using defaults") self._backtest_metrics = {"mae": 0, "rmse": 0} else: metrics_result = evaluator.generate_metrics(backtest_results) if metrics_result is None: logger.warning("Metrics calculation failed, using defaults") self._backtest_metrics = {"mae": 0, "rmse": 0} else: self._backtest_metrics = metrics_result return self._backtest_metrics def _get_actual_data(self, year: int, semester: int) -> Tuple[pd.DataFrame, bool]: if self._df_enrollment is None: return pd.DataFrame(), False actual_data = self._df_enrollment[ (self._df_enrollment["thn"] == year) & (self._df_enrollment["smt"] == semester) ] return actual_data, len(actual_data) > 0 def _calculate_class_metrics( self, courses_with_actual: pd.DataFrame, year: int, semester: int, ) -> Dict: if self._processor is None or self._config is None: return { "class_matches": 0, "class_within_one": 0, "total_for_class_accuracy": 0, "class_accuracy_pct": 0, "class_within_one_pct": 0, "has_actual_class_data": False, "data_source": "kalkulasi", } actual_classes_df = self._processor.get_class_count_for_validation( year, semester ) has_actual_class_data = False courses_with_class_data: Optional[pd.DataFrame] = None if len(actual_classes_df) > 0: courses_with_actual = courses_with_actual.merge( actual_classes_df, on="kode_mk", how="left" ) has_actual_class_data = courses_with_actual["actual_classes"].notna().any() if has_actual_class_data: courses_with_class_data = courses_with_actual[ courses_with_actual["actual_classes"].notna() ].copy() courses_with_class_data["actual_classes"] = courses_with_class_data[ "actual_classes" ].astype(int) class_matches = ( courses_with_class_data["classes_needed"] == courses_with_class_data["actual_classes"] ).sum() total_for_class_accuracy = len(courses_with_class_data) else: config = self._config courses_with_actual["actual_classes_calc"] = courses_with_actual.apply( lambda row: config.calculate_classes_needed( row["actual_enrollment"], row["kode_mk"], has_historical_data=True, ), axis=1, ) class_matches = ( courses_with_actual["classes_needed"] == courses_with_actual["actual_classes_calc"] ).sum() total_for_class_accuracy = len(courses_with_actual) class_accuracy_pct = ( (class_matches / total_for_class_accuracy) * 100 if total_for_class_accuracy > 0 else 0 ) if has_actual_class_data and courses_with_class_data is not None: class_within_one = ( abs( courses_with_class_data["classes_needed"] - courses_with_class_data["actual_classes"] ) <= 1 ).sum() else: class_within_one = ( abs( courses_with_actual["classes_needed"] - courses_with_actual["actual_classes_calc"] ) <= 1 ).sum() class_within_one_pct = ( (class_within_one / total_for_class_accuracy) * 100 if total_for_class_accuracy > 0 else 0 ) return { "class_matches": int(class_matches), "class_within_one": int(class_within_one), "total_for_class_accuracy": total_for_class_accuracy, "class_accuracy_pct": class_accuracy_pct, "class_within_one_pct": class_within_one_pct, "has_actual_class_data": has_actual_class_data, "data_source": "tabel2" if has_actual_class_data else "kalkulasi", } def _prepare_comparison_table( self, predictions: pd.DataFrame, actual_data: pd.DataFrame, year: int, semester: int, ) -> Optional[pd.DataFrame]: if self._processor is None or self._config is None: return None comparison = predictions.merge( actual_data[["kode_mk", "enrollment"]], on="kode_mk", how="left" ) comparison = comparison.rename(columns={"enrollment": "actual_enrollment"}) actual_classes_df = self._processor.get_class_count_for_validation( year, semester ) if len(actual_classes_df) > 0: comparison = comparison.merge(actual_classes_df, on="kode_mk", how="left") else: comparison["actual_classes"] = None courses_with_actual = comparison[comparison["actual_enrollment"].notna()].copy() if len(courses_with_actual) == 0: return None courses_with_actual["error"] = ( courses_with_actual["predicted_enrollment"] - courses_with_actual["actual_enrollment"] ) courses_with_actual["abs_error"] = abs(courses_with_actual["error"]) courses_with_actual["accuracy_%"] = 100 * ( 1 - courses_with_actual["abs_error"] / courses_with_actual["actual_enrollment"].replace(0, 1) ) if ( "actual_classes" not in courses_with_actual.columns or courses_with_actual["actual_classes"].isna().all() ): config_ref = self._config courses_with_actual["actual_classes"] = courses_with_actual.apply( lambda row: config_ref.calculate_classes_needed( row["actual_enrollment"], row["kode_mk"], has_historical_data=True, ), axis=1, ) else: config_ref = self._config courses_with_actual["actual_classes"] = courses_with_actual.apply( lambda row: ( int(row["actual_classes"]) if pd.notna(row["actual_classes"]) else config_ref.calculate_classes_needed( row["actual_enrollment"], row["kode_mk"], has_historical_data=True, ) ), axis=1, ) courses_with_actual["class_diff"] = ( courses_with_actual["classes_needed"] - courses_with_actual["actual_classes"] ) comparison_display = courses_with_actual[ [ "kode_mk", "nama_mk", "actual_enrollment", "predicted_enrollment", "actual_classes", "classes_needed", "class_diff", "error", "accuracy_%", "strategy", ] ].copy() comparison_display.columns = [ "Kode MK", "Nama MK", "Aktual", "Prediksi", "Kelas Aktual", "Kelas Prediksi", "Selisih Kelas", "Error", "Akurasi %", "Strategy", ] comparison_display["Aktual"] = comparison_display["Aktual"].astype(int) comparison_display["Prediksi"] = comparison_display["Prediksi"].round(1) comparison_display["Error"] = comparison_display["Error"].round(1) comparison_display["Akurasi %"] = comparison_display["Akurasi %"].round(1) comparison_display["Kelas Aktual"] = comparison_display["Kelas Aktual"].astype( int ) comparison_display["Kelas Prediksi"] = comparison_display[ "Kelas Prediksi" ].astype(int) comparison_display["Selisih Kelas"] = comparison_display[ "Selisih Kelas" ].astype(int) return comparison_display.sort_values("Aktual", ascending=False) def _prepare_predictions_display(self, predictions: pd.DataFrame) -> pd.DataFrame: """Prepare predictions dataframe for display.""" display_df = predictions[ [ "kode_mk", "nama_mk", "predicted_enrollment", "classes_needed", "class_capacity", "total_quota", "utilization_pct", "recommendation", "confidence", "strategy", ] ].copy() display_df.columns = [ "Kode MK", "Nama MK", "Prediksi", "Jumlah Kelas", "Kapasitas/Kelas", "Total Kuota", "Utilization %", "Status", "Confidence", "Strategy", ] display_df["Prediksi"] = display_df["Prediksi"].round(1) display_df["Jumlah Kelas"] = display_df["Jumlah Kelas"].astype(int) display_df["Total Kuota"] = display_df["Total Kuota"].astype(int) display_df["Status"] = display_df["Status"].map( {"BUKA": "BUKA", "TUTUP": "TUTUP"} ) display_df = display_df[display_df["Confidence"] == "high"] display_df = display_df[display_df["Status"] == "BUKA"] display_df = display_df.sort_values("Prediksi", ascending=False) display_df = display_df.drop(columns=["Confidence", "Status"]) return display_df def generate_predictions(self, year: int, semester: int) -> PredictionResult: if semester not in [1, 2]: return PredictionResult( summary_data={}, predictions_df=pd.DataFrame(), comparison_df=None, has_actual_data=False, error="Semester harus 1 (Ganjil) atau 2 (Genap)", ) if year < 2020 or year > 2030: return PredictionResult( summary_data={}, predictions_df=pd.DataFrame(), comparison_df=None, has_actual_data=False, error="Year must be between 2020 and 2030", ) if not self._initialized: return PredictionResult( summary_data={}, predictions_df=pd.DataFrame(), comparison_df=None, has_actual_data=False, error="System not initialized. Please restart the app.", ) try: logger.info(f"Generating predictions for {year} Semester {semester}...") assert self._config is not None assert self._predictor is not None assert self._processor is not None assert self._df_enrollment is not None assert self._elective_codes is not None self._config.prediction.PREDICT_YEAR = year self._config.prediction.PREDICT_SEMESTER = semester actual_data, has_actual_data = self._get_actual_data(year, semester) if has_actual_data: logger.info( f"Found actual enrollment data for {year} Semester {semester}" ) else: logger.info(f"No actual data for {year} Semester {semester}") metrics = self._run_backtest_if_needed() predictions = self._predictor.generate_batch_predictions( self._df_enrollment, self._processor.raw_data["courses"], self._elective_codes, year, semester, ) open_courses = predictions[predictions["recommendation"] == "BUKA"] total_to_open = len(open_courses) total_classes = int(open_courses["classes_needed"].sum()) total_predicted_students = int(open_courses["predicted_enrollment"].sum()) total_capacity = int(open_courses["total_quota"].sum()) class_capacity = self._config.class_capacity.DEFAULT_CLASS_CAPACITY summary_data = { "year": year, "semester": semester, "semester_name": "1 (Ganjil)" if semester == 1 else "2 (Genap)", "total_to_open": total_to_open, "total_classes": total_classes, "total_predicted_students": total_predicted_students, "total_capacity": total_capacity, "class_capacity": class_capacity, "metrics": metrics, "has_actual_data": has_actual_data, } comparison_df = None if has_actual_data: comparison = predictions.merge( actual_data[["kode_mk", "enrollment"]], on="kode_mk", how="left" ) comparison = comparison.rename( columns={"enrollment": "actual_enrollment"} ) courses_with_actual = comparison[ comparison["actual_enrollment"].notna() ].copy() if len(courses_with_actual) > 0: comparison_mae = abs( courses_with_actual["predicted_enrollment"] - courses_with_actual["actual_enrollment"] ).mean() comparison_rmse = ( ( courses_with_actual["predicted_enrollment"] - courses_with_actual["actual_enrollment"] ) ** 2 ).mean() ** 0.5 total_actual = courses_with_actual["actual_enrollment"].sum() total_predicted = courses_with_actual["predicted_enrollment"].sum() accuracy_pct = ( 1 - abs(total_predicted - total_actual) / total_actual ) * 100 class_metrics = self._calculate_class_metrics( courses_with_actual.copy(), year, semester ) summary_data.update( { "comparison_mae": comparison_mae, "comparison_rmse": comparison_rmse, "total_actual": total_actual, "total_predicted": total_predicted, "accuracy_pct": accuracy_pct, **class_metrics, } ) comparison_df = self._prepare_comparison_table( predictions, actual_data, year, semester ) predictions_display = self._prepare_predictions_display(predictions) return PredictionResult( summary_data=summary_data, predictions_df=predictions_display, comparison_df=comparison_df, has_actual_data=has_actual_data, ) except Exception as e: logger.error(f"Error generating predictions: {e}", exc_info=True) return PredictionResult( summary_data={}, predictions_df=pd.DataFrame(), comparison_df=None, has_actual_data=False, error=str(e), ) def generate_multi_year_forecast( self, year: int, semester: int, years_ahead: int = 3 ) -> ForecastResult: if not self._initialized: return ForecastResult( summary_data={}, forecast_df=pd.DataFrame(), yearly_summary=pd.DataFrame(), error="System not initialized.", ) try: logger.info(f"Generating {years_ahead}-year forecast from {year}...") assert self._config is not None assert self._predictor is not None assert self._processor is not None assert self._df_enrollment is not None assert self._elective_codes is not None forecast_df = self._predictor.generate_multi_year_forecast( self._df_enrollment, self._processor.raw_data["courses"], self._elective_codes, year, semester, years_ahead, ) if forecast_df.empty: return ForecastResult( summary_data={}, forecast_df=pd.DataFrame(), yearly_summary=pd.DataFrame(), error="Tidak ada data untuk forecast.", ) yearly_summary = ( forecast_df.groupby("year") .agg( { "predicted_enrollment": "sum", "classes_needed": "sum", "total_capacity": "sum", "kode_mk": "count", } ) .reset_index() ) yearly_summary.columns = [ "Tahun", "Total Prediksi", "Total Kelas", "Total Kapasitas", "Jumlah MK", ] class_capacity = self._config.class_capacity.DEFAULT_CLASS_CAPACITY semester_name = "Ganjil" if semester == 1 else "Genap" first_year = yearly_summary.iloc[0] last_year = yearly_summary.iloc[-1] growth_classes = int(last_year["Total Kelas"] - first_year["Total Kelas"]) growth_students = int( last_year["Total Prediksi"] - first_year["Total Prediksi"] ) summary_data = { "year": year, "semester": semester, "semester_name": semester_name, "years_ahead": years_ahead, "class_capacity": class_capacity, "first_year_classes": int(first_year["Total Kelas"]), "last_year_classes": int(last_year["Total Kelas"]), "growth_classes": growth_classes, "growth_students": growth_students, } display_df = forecast_df[ [ "year", "kode_mk", "nama_mk", "predicted_enrollment", "classes_needed", "total_capacity", ] ].copy() display_df.columns = [ "Tahun", "Kode MK", "Nama MK", "Prediksi", "Kelas", "Kapasitas", ] display_df["Prediksi"] = display_df["Prediksi"].round(0).astype(int) display_df = display_df.sort_values(["Kode MK", "Tahun"]) return ForecastResult( summary_data=summary_data, forecast_df=display_df, yearly_summary=yearly_summary, ) except Exception as e: logger.error(f"Error generating forecast: {e}", exc_info=True) return ForecastResult( summary_data={}, forecast_df=pd.DataFrame(), yearly_summary=pd.DataFrame(), error=str(e), ) _backend_instance: Optional[PredictionBackend] = None def get_backend() -> PredictionBackend: """Get the singleton backend instance.""" global _backend_instance if _backend_instance is None: _backend_instance = PredictionBackend() return _backend_instance