classquota / config.py
muhalwan's picture
Revised version
6a0a429
raw
history blame
6.25 kB
import os
from dataclasses import dataclass, field
from typing import Dict, List
try:
from data_loader import load_data_file
DATA_LOADER_AVAILABLE = True
except ImportError:
DATA_LOADER_AVAILABLE = False
def load_data_file() -> str:
return "data/optimized_data.xlsx"
def _get_data_file_path() -> str:
if os.getenv("HF_TOKEN"):
return load_data_file()
elif os.getenv("DEMO_MODE", "false").lower() == "true":
return "data/demo_data.xlsx"
else:
return "data/optimized_data.xlsx"
@dataclass
class DataConfig:
FILE_PATH: str = field(default_factory=_get_data_file_path)
SHEET_COURSES: str = "tabel1_data_matkul"
SHEET_OFFERINGS: str = "tabel2_data_matkul_dibuka"
SHEET_STUDENTS_YEARLY: str = "tabel3_data_mahasiswa_per_tahun"
SHEET_STUDENTS_INDIVIDUAL: str = "tabel4_data_individu_mahasiswa"
# Standardization
OFFERINGS_RENAME: Dict[str, str] = field(
default_factory=lambda: {"tahun": "thn", "semester": "smt"}
)
ELECTIVE_CATEGORY: str = "P"
MANDATORY_CATEGORY: str = "W"
VALID_CATEGORIES: List[str] = field(default_factory=lambda: ["P", "W"])
@dataclass
class ClassCapacityConfig:
# Default maximum students per class
DEFAULT_CLASS_CAPACITY: int = 50
# Minimum students required to open a class
MIN_STUDENTS_TO_OPEN_CLASS: int = 1
# Threshold for opening additional classes
ADDITIONAL_CLASS_THRESHOLD: float = 0.7
# Always open at least 1 class if there's any historical enrollment
OPEN_CLASS_IF_HAS_HISTORY: bool = True
# Course-specific capacity overrides (kode_mk -> max_capacity)
COURSE_CAPACITY_OVERRIDES: Dict[str, int] = field(default_factory=dict)
# Warning threshold - if predicted > capacity * threshold, warn about capacity
CAPACITY_WARNING_THRESHOLD: float = 0.8
# Enable capacity-aware prediction
# When True, predictions will be bounded by realistic capacity constraints
ENABLE_CAPACITY_CONSTRAINTS: bool = True
@dataclass
class MultiYearForecastConfig:
# How many years ahead to forecast
FORECAST_YEARS_AHEAD: int = 3
# Include trend analysis in output
SHOW_TREND_ANALYSIS: bool = True
# Confidence interval for forecasts (0-1)
CONFIDENCE_INTERVAL: float = 0.95
# Growth rate limits for sanity checking
MAX_YEARLY_GROWTH_RATE: float = 0.5 # 50% max growth per year
MIN_YEARLY_GROWTH_RATE: float = -0.3 # 30% max decline per year
@dataclass
class ModelConfig:
# Prophet Hyperparameters
GROWTH_MODE: str = "logistic"
CHANGEPOINT_SCALE: float = 0.01
SEASONALITY_MODE: str = "multiplicative"
YEARLY_SEASONALITY: bool = True
FALLBACK_DEFAULT: int = 20
# Prediction safety limits
# Maximum multiplier of historical max enrollment before flagging as unrealistic
SANITY_CHECK_MAX_MULTIPLIER: float = 3.0
# Minimum historical data points required for reliable prediction
MIN_HISTORY_POINTS: int = 3
# Use student population as regressor
USE_POPULATION_REGRESSOR: bool = True
# Use capacity as upper bound (cap in logistic growth)
USE_CAPACITY_AS_CAP: bool = True
@dataclass
class PredictionConfig:
"""Business logic for predictions."""
PREDICT_YEAR: int = 2025
PREDICT_SEMESTER: int = 2
BUFFER_PERCENT: float = 0.20
MIN_QUOTA_OPEN: int = 25
MIN_PREDICT_THRESHOLD: int = 15
MAX_CAPACITY_MULTIPLIER: float = 2.0
ABSOLUTE_MAX_STUDENTS: int = 400
SEMESTER_TO_MONTH: Dict[int, str] = field(
default_factory=lambda: {
1: "09-01",
2: "03-01",
}
)
@dataclass
class OutputConfig:
OUTPUT_DIR: str = "output"
LOG_LEVEL: str = "INFO"
TOP_N_DISPLAY: int = 30
@dataclass
class BacktestConfig:
START_YEAR: int = 2010
END_YEAR: int = 2024
VERBOSE: bool = True
# Minimum elective enrollments required for backtesting
MIN_ELECTIVE_ENROLLMENTS: int = 1
# Minimum unique courses required for backtesting
MIN_UNIQUE_COURSES: int = 1
class Config:
def __init__(self):
self.data: DataConfig = DataConfig()
self.model: ModelConfig = ModelConfig()
self.prediction: PredictionConfig = PredictionConfig()
self.output: OutputConfig = OutputConfig()
self.backtest: BacktestConfig = BacktestConfig()
self.class_capacity: ClassCapacityConfig = ClassCapacityConfig()
self.multi_year: MultiYearForecastConfig = MultiYearForecastConfig()
def get_prediction_target_name(self) -> str:
sem = "Ganjil" if self.prediction.PREDICT_SEMESTER == 1 else "Genap"
return f"{self.prediction.PREDICT_YEAR} Semester {sem}"
def get_elective_filter_description(self) -> str:
return f"kategori_mk = '{self.data.ELECTIVE_CATEGORY}' in {self.data.SHEET_COURSES}"
def get_class_capacity(self, course_code: str) -> int:
if course_code in self.class_capacity.COURSE_CAPACITY_OVERRIDES:
return self.class_capacity.COURSE_CAPACITY_OVERRIDES[course_code]
return self.class_capacity.DEFAULT_CLASS_CAPACITY
def calculate_classes_needed(
self,
predicted_enrollment: float,
course_code: str,
has_historical_data: bool = True,
) -> int:
import math
capacity = self.get_class_capacity(course_code)
if predicted_enrollment <= 0:
return 0
if predicted_enrollment < 1 and has_historical_data:
return 1
classes = math.ceil(predicted_enrollment / capacity)
return max(1, classes)
def get_capacity_status(self, predicted_enrollment: float, course_code: str) -> str:
capacity = self.get_class_capacity(course_code)
classes_needed = self.calculate_classes_needed(
predicted_enrollment, course_code
)
if classes_needed == 0:
return "UNDER"
total_capacity = classes_needed * capacity
utilization = predicted_enrollment / total_capacity
if utilization >= 1.0:
return "OVER"
elif utilization >= self.class_capacity.CAPACITY_WARNING_THRESHOLD:
return "WARNING"
else:
return "NORMAL"
default_config = Config()