import pickle
import json
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Dict, List, Optional, Union
import warnings

warnings.filterwarnings('ignore')


class EarlyWarningPredictor:
    """자영업 조기경보 예측 모델"""

    def __init__(self, model_path: Optional[str] = None):
        self.model_path = Path(model_path) if model_path else Path(__file__).parent.parent / 'model'
        self.xgb_model = None
        self.lgb_model = None
        self.catboost_model = None
        self.label_encoders = {}
        self.feature_names = []
        self.config = {}
        self.is_loaded = False

    @classmethod
    def from_pretrained(cls, model_name_or_path: str):
        predictor = cls(model_path=model_name_or_path)
        predictor.load_model()
        return predictor

    def load_model(self):
        """모델 및 설정 로드"""
        if not self.model_path.exists():
            raise FileNotFoundError(f"Model directory not found: {self.model_path}")

        # XGBoost 로드
        xgb_path = self.model_path / 'xgboost_model.pkl'
        if xgb_path.exists():
            with open(xgb_path, 'rb') as f:
                self.xgb_model = pickle.load(f)

        # LightGBM 로드
        lgb_path = self.model_path / 'lightgbm_model.pkl'
        if lgb_path.exists():
            with open(lgb_path, 'rb') as f:
                self.lgb_model = pickle.load(f)

        # CatBoost 로드
        catboost_path = self.model_path / 'catboost_model.pkl'
        if catboost_path.exists():
            with open(catboost_path, 'rb') as f:
                self.catboost_model = pickle.load(f)

        # Label Encoders 로드
        le_path = self.model_path / 'label_encoders.pkl'
        if le_path.exists():
            with open(le_path, 'rb') as f:
                self.label_encoders = pickle.load(f)

        # Feature names 로드
        fn_path = self.model_path / 'feature_names.json'
        if fn_path.exists():
            with open(fn_path, 'r', encoding='utf-8') as f:
                self.feature_names = json.load(f)

        # Config 로드
        config_path = self.model_path / 'config.json'
        if config_path.exists():
            with open(config_path, 'r', encoding='utf-8') as f:
                self.config = json.load(f)

        self.is_loaded = True
        print(f"모델 로드 완료: v{self.config.get('model_version', '2.0')}")

    def predict(self, store_data: Dict,
                monthly_usage: Optional[pd.DataFrame] = None,
                monthly_customers: Optional[pd.DataFrame] = None,
                threshold: Optional[float] = None) -> Dict:
        if not self.is_loaded:
            self.load_model()

        # 특징 생성
        from src.feature_engineering import FeatureEngineer
        engineer = FeatureEngineer()

        if monthly_usage is None or monthly_customers is None:
            # 간단한 데이터 형식
            features = self._create_simple_features(store_data)
        else:
            # 전체 특징 생성
            features = engineer.create_features(store_data, monthly_usage, monthly_customers)

        # 특징 정렬 및 결측치 처리
        features = self._align_features(features)

        # 예측
        threshold = threshold or self.config.get('threshold', 0.5)

        if self.xgb_model and self.lgb_model:
            # 앙상블 예측
            xgb_prob = self.xgb_model.predict_proba(features)[0][1]
            lgb_prob = self.lgb_model.predict_proba(features)[0][1]

            weights = self.config.get('ensemble_weights', [0.5, 0.5])
            closure_probability = weights[0] * xgb_prob + weights[1] * lgb_prob

            if self.catboost_model and len(weights) > 2:
                cat_prob = self.catboost_model.predict_proba(features)[0][1]
                closure_probability = (weights[0] * xgb_prob +
                                       weights[1] * lgb_prob +
                                       weights[2] * cat_prob)
        else:
            closure_probability = 0.5

        # 위험도 점수(0-100)
        risk_score = closure_probability * 100

        # 위험 등급
        if risk_score < 30:
            risk_level = '낮음'
            risk_color = 'green'
        elif risk_score < 60:
            risk_level = '보통'
            risk_color = 'yellow'
        else:
            risk_level = '높음'
            risk_color = 'red'

        # 예측 결과
        result = {
            'risk_score': round(risk_score, 2),
            'risk_level': risk_level,
            'risk_color': risk_color,
            'closure_probability': round(closure_probability, 4),
            'is_at_risk': closure_probability > threshold,
            'threshold': threshold,
            'confidence': max(closure_probability, 1 - closure_probability),
            'model_version': self.config.get('model_version', '2.0')
        }

        # 위험 요인 분석(특징 중요도 기반)
        if self.xgb_model:
            result['risk_factors'] = self._analyze_risk_factors(features)

        # 액션 아이템
        result['action_items'] = self._generate_action_items(result, store_data)

        return result

    def predict_batch(self, stores_df: pd.DataFrame) -> pd.DataFrame:
        results = []

        for idx, row in stores_df.iterrows():
            store_data = row.to_dict()
            result = self.predict(store_data)
            result['store_id'] = row.get('store_id', idx)
            results.append(result)

        return pd.DataFrame(results)

    def explain(self, store_data: Dict, top_n: int = 10) -> Dict:
        # SHAP 분석(간단한 버전)
        result = self.predict(store_data)

        explanation = {
            'prediction': result,
            'top_features': result.get('risk_factors', {}),
            'interpretation': self._interpret_prediction(result)
        }

        return explanation

    def _create_simple_features(self, store_data: Dict) -> pd.DataFrame:
        """간단한 특징 생성"""
        # 기본 특징만 사용
        features = {
            'sales_avg_all': store_data.get('avg_sales', 50),
            'customer_reuse_rate': store_data.get('reuse_rate', 25),
            'operation_months': store_data.get('operating_months', 12),
            'trend_slope': store_data.get('sales_trend', 0),
        }

        # 나머지 특징은 기본값으로
        for fname in self.feature_names:
            if fname not in features:
                features[fname] = 0

        return pd.DataFrame([features])

    def _align_features(self, features: pd.DataFrame) -> pd.DataFrame:
        """특징 정렬 및 전처리"""
        # 모델 학습 시 사용한 특징 순서로 정렬
        aligned = pd.DataFrame()

        for fname in self.feature_names:
            if fname in features.columns:
                aligned[fname] = features[fname]
            else:
                aligned[fname] = 0

        # 결측치 처리
        aligned = aligned.fillna(aligned.median().fillna(0))

        return aligned

    def _analyze_risk_factors(self, features: pd.DataFrame) -> Dict[str, float]:
        """위험 요인 분석"""
        # 특징 중요도 기반
        if not hasattr(self.xgb_model, 'feature_importances_'):
            return {}

        importance = self.xgb_model.feature_importances_
        feature_values = features.iloc[0].values

        # 중요도와 값을 곱해서 기여도 계산
        contributions = {}

        for i, fname in enumerate(self.feature_names):
            if importance[i] > 0.01:  # 중요한 특징만
                score = importance[i] * abs(feature_values[i]) * 10

                # 특징명을 한글로 변환
                readable_name = self._translate_feature_name(fname)
                contributions[readable_name] = min(round(score, 1), 100)

        # 상위 6개만 반환
        sorted_factors = sorted(contributions.items(), key=lambda x: x[1], reverse=True)[:6]

        return dict(sorted_factors)

    def _translate_feature_name(self, fname: str) -> str:
        """특징명을 읽기 쉬운 형태로 변환"""
        translations = {
            'sales_avg': '매출',
            'trend_slope': '매출 추세',
            'trend_consecutive_down': '연속 하락',
            'customer_reuse_rate': '재이용률',
            'volatility_cv': '매출 변동성',
            'operation_months': '영업 기간',
            'sales_recent_vs_previous': '최근 매출 변화'
        }

        for key, value in translations.items():
            if key in fname:
                return value

        return fname

    def _generate_action_items(self, result: Dict, store_data: Dict) -> List[str]:
        """액션 아이템 생성"""
        actions = []

        risk_score = result['risk_score']

        if risk_score > 70:
            actions.append("즉시 조치 필요: 비용 절감 및 매출 증대 방안 마련")
            actions.append("현금흐름 개선: 외상 매출 회수 및 재고 최적화")
            actions.append("전문가 상담: 경영 컨설팅 및 구조조정 검토")
        elif risk_score > 40:
            actions.append("매출 분석: 주력 상품/서비스 재점검")
            actions.append("마케팅 강화: 신규 고객 유치 캠페인")
            actions.append("차별화 전략: 경쟁력 있는 요소 발굴 및 강화")
        else:
            actions.append("현재 상태 유지: 정기적인 모니터링 지속")
            actions.append("성장 기회 탐색: 추가 매출원 발굴")
            actions.append("고객 충성도 강화: 멤버십 프로그램 등")

        return actions

    def _interpret_prediction(self, result: Dict) -> str:
        """예측 결과 해석"""
        risk_level = result['risk_level']
        risk_score = result['risk_score']

        if risk_level == '높음':
            return f"위험도가 매우 높습니다 ({risk_score:.1f}점). 폐업 위험이 높으므로 즉각적인 대응이 필요합니다."
        elif risk_level == '보통':
            return f"주의가 필요합니다 ({risk_score:.1f}점). 개선 방안을 마련하여 위험을 줄이세요."
        else:
            return f"안정적입니다 ({risk_score:.1f}점). 현재의 운영 방식을 유지하면서 지속적으로 모니터링하세요."

    def get_model_info(self) -> Dict:
        """모델 정보 반환"""
        return {
            'version': self.config.get('model_version', '2.0'),
            'n_features': self.config.get('n_features', 0),
            'performance': self.config.get('performance', {}),
            'ensemble_weights': self.config.get('ensemble_weights', []),
            'models': {
                'xgboost': self.xgb_model is not None,
                'lightgbm': self.lgb_model is not None,
                'catboost': self.catboost_model is not None
            }
        }


if __name__ == "__main__":
    # 사용 예시
    print("=" * 70)
    print("Early Warning Predictor v2.0 테스트")
    print("=" * 70)

    # 모델 로드
    predictor = EarlyWarningPredictor(model_path='../model')

    try:
        predictor.load_model()

        # 테스트 데이터
        store_data = {
            'store_id': 'TEST_001',
            'industry': '카페',
            'location': '서울 강남구',
            'avg_sales': 45,
            'reuse_rate': 22.5,
            'operating_months': 18,
            'sales_trend': -0.05
        }

        # 예측
        result = predictor.predict(store_data)

        print("\n예측 결과:")
        print(f"  위험도 점수: {result['risk_score']}/100")
        print(f"  위험 등급: {result['risk_level']}")
        print(f"  폐업 확률: {result['closure_probability']:.1%}")

        if 'risk_factors' in result:
            print("\n주요 위험 요인:")
            for factor, score in result['risk_factors'].items():
                print(f"    - {factor}: {score:.1f}점")

        print("\n액션 아이템:")
        for action in result['action_items']:
            print(f"    {action}")

    except FileNotFoundError:
        print("모델 파일이 없습니다. 먼저 모델을 학습해주세요.")