import os from dataclasses import dataclass, field from typing import Dict, List try: from data_loader import load_data_file DATA_LOADER_AVAILABLE = True except ImportError: DATA_LOADER_AVAILABLE = False def load_data_file() -> str: return "data/optimized_data.xlsx" def _get_data_file_path() -> str: if os.getenv("HF_TOKEN"): return load_data_file() elif os.getenv("DEMO_MODE", "false").lower() == "true": return "data/demo_data.xlsx" else: return "data/optimized_data.xlsx" @dataclass class DataConfig: FILE_PATH: str = field(default_factory=_get_data_file_path) SHEET_COURSES: str = "tabel1_data_matkul" SHEET_OFFERINGS: str = "tabel2_data_matkul_dibuka" SHEET_STUDENTS_YEARLY: str = "tabel3_data_mahasiswa_per_tahun" SHEET_STUDENTS_INDIVIDUAL: str = "tabel4_data_individu_mahasiswa" # Standardization OFFERINGS_RENAME: Dict[str, str] = field( default_factory=lambda: {"tahun": "thn", "semester": "smt"} ) ELECTIVE_CATEGORY: str = "P" MANDATORY_CATEGORY: str = "W" VALID_CATEGORIES: List[str] = field(default_factory=lambda: ["P", "W"]) @dataclass class ClassCapacityConfig: # Default maximum students per class DEFAULT_CLASS_CAPACITY: int = 50 # Minimum students required to open a class MIN_STUDENTS_TO_OPEN_CLASS: int = 1 # Threshold for opening additional classes ADDITIONAL_CLASS_THRESHOLD: float = 0.7 # Always open at least 1 class if there's any historical enrollment OPEN_CLASS_IF_HAS_HISTORY: bool = True # Course-specific capacity overrides (kode_mk -> max_capacity) COURSE_CAPACITY_OVERRIDES: Dict[str, int] = field(default_factory=dict) # Warning threshold - if predicted > capacity * threshold, warn about capacity CAPACITY_WARNING_THRESHOLD: float = 0.8 # Enable capacity-aware prediction # When True, predictions will be bounded by realistic capacity constraints ENABLE_CAPACITY_CONSTRAINTS: bool = True @dataclass class MultiYearForecastConfig: # How many years ahead to forecast FORECAST_YEARS_AHEAD: int = 3 # Include trend analysis in output SHOW_TREND_ANALYSIS: bool = True # Confidence interval for forecasts (0-1) CONFIDENCE_INTERVAL: float = 0.95 # Growth rate limits for sanity checking MAX_YEARLY_GROWTH_RATE: float = 0.5 # 50% max growth per year MIN_YEARLY_GROWTH_RATE: float = -0.3 # 30% max decline per year @dataclass class ModelConfig: # Prophet Hyperparameters GROWTH_MODE: str = "logistic" CHANGEPOINT_SCALE: float = 0.01 SEASONALITY_MODE: str = "multiplicative" YEARLY_SEASONALITY: bool = True FALLBACK_DEFAULT: int = 20 # Prediction safety limits # Maximum multiplier of historical max enrollment before flagging as unrealistic SANITY_CHECK_MAX_MULTIPLIER: float = 3.0 # Minimum historical data points required for reliable prediction MIN_HISTORY_POINTS: int = 3 # Use student population as regressor USE_POPULATION_REGRESSOR: bool = True # Use capacity as upper bound (cap in logistic growth) USE_CAPACITY_AS_CAP: bool = True @dataclass class PredictionConfig: """Business logic for predictions.""" PREDICT_YEAR: int = 2025 PREDICT_SEMESTER: int = 2 BUFFER_PERCENT: float = 0.20 MIN_QUOTA_OPEN: int = 25 MIN_PREDICT_THRESHOLD: int = 15 MAX_CAPACITY_MULTIPLIER: float = 2.0 ABSOLUTE_MAX_STUDENTS: int = 400 SEMESTER_TO_MONTH: Dict[int, str] = field( default_factory=lambda: { 1: "09-01", 2: "03-01", } ) @dataclass class OutputConfig: OUTPUT_DIR: str = "output" LOG_LEVEL: str = "INFO" TOP_N_DISPLAY: int = 30 @dataclass class BacktestConfig: START_YEAR: int = 2010 END_YEAR: int = 2024 VERBOSE: bool = True # Minimum elective enrollments required for backtesting MIN_ELECTIVE_ENROLLMENTS: int = 1 # Minimum unique courses required for backtesting MIN_UNIQUE_COURSES: int = 1 class Config: def __init__(self): self.data: DataConfig = DataConfig() self.model: ModelConfig = ModelConfig() self.prediction: PredictionConfig = PredictionConfig() self.output: OutputConfig = OutputConfig() self.backtest: BacktestConfig = BacktestConfig() self.class_capacity: ClassCapacityConfig = ClassCapacityConfig() self.multi_year: MultiYearForecastConfig = MultiYearForecastConfig() def get_prediction_target_name(self) -> str: sem = "Ganjil" if self.prediction.PREDICT_SEMESTER == 1 else "Genap" return f"{self.prediction.PREDICT_YEAR} Semester {sem}" def get_elective_filter_description(self) -> str: return f"kategori_mk = '{self.data.ELECTIVE_CATEGORY}' in {self.data.SHEET_COURSES}" def get_class_capacity(self, course_code: str) -> int: if course_code in self.class_capacity.COURSE_CAPACITY_OVERRIDES: return self.class_capacity.COURSE_CAPACITY_OVERRIDES[course_code] return self.class_capacity.DEFAULT_CLASS_CAPACITY def calculate_classes_needed( self, predicted_enrollment: float, course_code: str, has_historical_data: bool = True, ) -> int: import math capacity = self.get_class_capacity(course_code) if predicted_enrollment <= 0: return 0 if predicted_enrollment < 1 and has_historical_data: return 1 classes = math.ceil(predicted_enrollment / capacity) return max(1, classes) def get_capacity_status(self, predicted_enrollment: float, course_code: str) -> str: capacity = self.get_class_capacity(course_code) classes_needed = self.calculate_classes_needed( predicted_enrollment, course_code ) if classes_needed == 0: return "UNDER" total_capacity = classes_needed * capacity utilization = predicted_enrollment / total_capacity if utilization >= 1.0: return "OVER" elif utilization >= self.class_capacity.CAPACITY_WARNING_THRESHOLD: return "WARNING" else: return "NORMAL" default_config = Config()