kr4phy's picture
Sync from GitHub
cff6ac7
Raw
History Blame Contribute Delete
8.88 kB
"""
์ข…ํ•ฉ ์ง„๋‹จ ์—”์ง„ ๋ชจ๋“ˆ.
์˜ˆ์ธก(GranitePredictor), XAI(ShapExplainer), ์ ์ˆ˜(SustainabilityScorer) ๋ฅผ
ํ†ตํ•ฉํ•˜์—ฌ ๋‹จ์ผ API ํ˜ธ์ถœ๋กœ ํ•™๊ต ์ „์ฒด ์ง„๋‹จ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
๋˜ํ•œ ์ •์ฑ… ์‹œ๋ฎฌ๋ ˆ์ด์…˜ ๊ธฐ๋Šฅ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค:
- ๊ต์‚ฌ 1์ธ๋‹น ํ•™์ƒ ์ˆ˜ ๊ฐœ์„  ์‹œ๋‚˜๋ฆฌ์˜ค
- ๋ณตํ•ฉ์‹œ์„ค ๋„์ž… ์‹œ๋‚˜๋ฆฌ์˜ค
- ๊ธฐ๊ฐ„์ œ ๊ต์› ์ •๊ทœ์ง ์ „ํ™˜ ์‹œ๋‚˜๋ฆฌ์˜ค
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Any
import pandas as pd
from src.analytics.predictor import ForecastResult, GranitePredictor
from src.analytics.scorer import SustainabilityScore, SustainabilityScorer
from src.analytics.xai import ShapExplainer, ShapResult
logger = logging.getLogger(__name__)
# ์‹œ๋‚˜๋ฆฌ์˜ค ์ •์˜ (key: ์‹œ๋‚˜๋ฆฌ์˜คID, value: ๋ณ€๊ฒฝ ํ•ญ๋ชฉ)
SIMULATION_SCENARIOS: dict[str, dict[str, Any]] = {
"improve_teacher_ratio": {
"label": "๊ต์‚ฌ 1์ธ๋‹น ํ•™์ƒ ์ˆ˜ ๊ฐœ์„ ",
"description": "๊ธฐ๊ฐ„์ œ ๊ต์›์„ ์ •๊ทœ์ง์œผ๋กœ ์ „ํ™˜ํ•˜๊ณ  ๊ต์›์„ 1๋ช… ์ถ”๊ฐ€ ๋ฐฐ์น˜ํ•ฉ๋‹ˆ๋‹ค.",
"delta": {"teacher_count": 1, "temp_teacher_count": -1},
},
"introduce_complex_facility": {
"label": "๋ณตํ•ฉ์‹œ์„ค ๋„์ž…",
"description": "๋„์„œ๊ด€ยท์ฒด์œก๊ด€ ๋“ฑ ๋ณตํ•ฉ์‹œ์„ค์„ ๋„์ž…ํ•˜์—ฌ ์ง€์—ญ ์œ ์ž… ํšจ๊ณผ๋ฅผ ๋ฐ˜์˜ํ•ฉ๋‹ˆ๋‹ค.",
"delta": {"established_year_delta": 20, "transfer_net_avg": 3},
},
"community_revitalization": {
"label": "์ง€์—ญ ์—ฐ๊ณ„ ๊ฐ•ํ™”",
"description": "์ง€์—ญ ์žฌ์ƒ ๊ฑฐ์ ์œผ๋กœ ์ „ํ™˜ํ•˜์—ฌ ์†Œ๋ฉธ์œ„ํ—˜์ง€์ˆ˜๋ฅผ ๊ฐœ์„ ํ•ฉ๋‹ˆ๋‹ค.",
"delta": {"population_risk_index": 0.3},
},
}
@dataclass
class DiagnosticsResult:
"""์ข…ํ•ฉ ์ง„๋‹จ ๊ฒฐ๊ณผ ์ปจํ…Œ์ด๋„ˆ."""
schul_code: str
school_name: str
status_label: str
status_code: int
sustainability_score: float
forecast: ForecastResult
shap_result: ShapResult
score_detail: SustainabilityScore
simulations: dict[str, dict[str, Any]] = field(default_factory=dict)
metadata: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"schul_code": self.schul_code,
"school_name": self.school_name,
"status_label": self.status_label,
"status_code": self.status_code,
"sustainability_score": round(self.sustainability_score, 2),
"forecast": self.forecast.to_dict(),
"shap_result": self.shap_result.to_dict(),
"score_detail": self.score_detail.to_dict(),
"simulations": self.simulations,
"metadata": self.metadata,
}
class DiagnosticsEngine:
"""
ํ•™๊ต ์ข…ํ•ฉ ์ง„๋‹จ ์—”์ง„.
์‚ฌ์šฉ ์˜ˆ::
engine = DiagnosticsEngine()
result = engine.diagnose(
schul_code="7431234",
school_data={...},
timeseries=pd.Series({2018: 120, ...}),
)
"""
def __init__(
self,
predictor: GranitePredictor | None = None,
shap_explainer: ShapExplainer | None = None,
scorer: SustainabilityScorer | None = None,
) -> None:
self._predictor = predictor or GranitePredictor()
self._shap = shap_explainer or ShapExplainer()
self._scorer = scorer or SustainabilityScorer()
def _run_simulation(
self,
scenario_id: str,
base_data: dict[str, Any],
) -> dict[str, Any]:
"""
๋‹จ์ผ ์ •์ฑ… ์‹œ๋‚˜๋ฆฌ์˜ค๋ฅผ ์ ์šฉํ•œ ์ง€์† ๊ฐ€๋Šฅ์„ฑ ์ ์ˆ˜๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
Parameters
----------
scenario_id:
SIMULATION_SCENARIOS ์˜ ํ‚ค.
base_data:
์›๋ณธ ํ•™๊ต ๋ฐ์ดํ„ฐ ๋”•์…”๋„ˆ๋ฆฌ.
Returns
-------
dict
์‹œ๋‚˜๋ฆฌ์˜ค ๋ ˆ์ด๋ธ”, ์„ค๋ช…, ๋ณ€๊ฒฝ ์ „/ํ›„ ์ ์ˆ˜ ๋น„๊ต.
"""
scenario = SIMULATION_SCENARIOS[scenario_id]
delta = scenario["delta"]
modified = dict(base_data)
for key, change in delta.items():
if key == "established_year_delta":
# ์‹œ์„ค ๋…ธํ›„๋„๋ฅผ ๊ฐœ์„ ํ•˜๋Š” ๊ฒƒ์„ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
orig = int(modified.get("established_year", 0) or 0)
if orig > 0:
modified["established_year"] = orig + int(change)
elif key in modified:
modified[key] = (float(modified[key] or 0)) + float(change)
else:
modified[key] = float(change)
base_score = self._scorer.compute(base_data.get("sd_schul_code", ""), base_data)
sim_score = self._scorer.compute(base_data.get("sd_schul_code", ""), modified)
return {
"label": scenario["label"],
"description": scenario["description"],
"base_total_score": round(base_score.total_score, 2),
"sim_total_score": round(sim_score.total_score, 2),
"delta_score": round(sim_score.total_score - base_score.total_score, 2),
"sim_curriculum_score": round(sim_score.curriculum_score, 2),
"sim_personnel_score": round(sim_score.personnel_score, 2),
"sim_facility_score": round(sim_score.facility_score, 2),
"sim_community_score": round(sim_score.community_score, 2),
}
def diagnose(
self,
schul_code: str,
school_data: dict[str, Any],
timeseries: pd.Series,
horizon_years: int = 5,
run_simulations: bool = True,
) -> DiagnosticsResult:
"""
ํ•™๊ต ์ข…ํ•ฉ ์ง„๋‹จ์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
Parameters
----------
schul_code:
๋Œ€์ƒ ํ•™๊ต SD_SCHUL_CODE.
school_data:
ํ•™๊ต ์ง€ํ‘œ ๋”•์…”๋„ˆ๋ฆฌ (SustainabilityScorer, ShapExplainer ๊ณตํ†ต ์ž…๋ ฅ).
timeseries:
์—ฐ๋„(int) ์ธ๋ฑ์Šค์˜ ํ•™์ƒ ์ˆ˜ ์‹œ๊ณ„์—ด Series.
horizon_years:
์˜ˆ์ธก ๊ธฐ๊ฐ„ (๋…„).
run_simulations:
์ •์ฑ… ์‹œ๋ฎฌ๋ ˆ์ด์…˜ ์‹คํ–‰ ์—ฌ๋ถ€.
Returns
-------
DiagnosticsResult
"""
school_name = str(school_data.get("school_name", schul_code))
logger.info("์ง„๋‹จ ์‹œ์ž‘: schul_code=%s school_name=%s", schul_code, school_name)
# 1. ์‹œ๊ณ„์—ด ์˜ˆ์ธก
try:
forecast = self._predictor.predict(
schul_code=schul_code,
timeseries=timeseries,
horizon_years=horizon_years,
target_col="student_count",
)
except Exception as exc:
logger.error("์˜ˆ์ธก ์‹คํŒจ: schul_code=%s error=%s", schul_code, exc)
raise
# ํžˆ์Šคํ† ๋ฆฌ๋ฅผ school_data ์— ์ฃผ์ž… (SHAP ํ”ผ์ฒ˜ ๊ณ„์‚ฐ์šฉ)
school_data_enriched = dict(school_data)
school_data_enriched["student_count_history"] = forecast.context_values
school_data_enriched["sd_schul_code"] = schul_code
# 2. SHAP ๋ถ„๋ฅ˜ ๋ฐ ๊ธฐ์—ฌ๋„ ๋ถ„์„
try:
shap_result = self._shap.explain(schul_code=schul_code, school_data=school_data_enriched)
except Exception as exc:
logger.error("SHAP ๋ถ„์„ ์‹คํŒจ: schul_code=%s error=%s", schul_code, exc)
raise
# 3. ์ง€์† ๊ฐ€๋Šฅ์„ฑ ์ ์ˆ˜ ์‚ฐ์ถœ
try:
score_detail = self._scorer.compute(schul_code=schul_code, school_data=school_data_enriched)
except Exception as exc:
logger.error("์ ์ˆ˜ ์‚ฐ์ถœ ์‹คํŒจ: schul_code=%s error=%s", schul_code, exc)
raise
# 4. ์ •์ฑ… ์‹œ๋ฎฌ๋ ˆ์ด์…˜
simulations: dict[str, dict[str, Any]] = {}
if run_simulations:
for sid in SIMULATION_SCENARIOS:
try:
simulations[sid] = self._run_simulation(sid, school_data_enriched)
except Exception as exc: # noqa: BLE001
logger.warning("์‹œ๋ฎฌ๋ ˆ์ด์…˜ ์‹คํŒจ: scenario=%s error=%s", sid, exc)
simulations[sid] = {"error": str(exc)}
result = DiagnosticsResult(
schul_code=schul_code,
school_name=school_name,
status_label=shap_result.status_label,
status_code=shap_result.status_code,
sustainability_score=score_detail.total_score,
forecast=forecast,
shap_result=shap_result,
score_detail=score_detail,
simulations=simulations,
metadata={
"diagnosed_at": pd.Timestamp.now().isoformat(),
"model_version": forecast.model_version,
"data_quality": school_data_enriched.get("data_quality_score", 1.0),
},
)
logger.info(
"์ง„๋‹จ ์™„๋ฃŒ: schul_code=%s status=%s score=%.1f",
schul_code,
shap_result.status_label,
score_detail.total_score,
)
return result