from fastapi import APIRouter
import pandas as pd

router = APIRouter()

import os

# Robust data path finding
DATA_PATH = "data/test.csv"
if not os.path.exists(DATA_PATH):
    # Try finding it relative to this file
    current_dir = os.path.dirname(os.path.abspath(__file__))
    parent_dir = os.path.dirname(current_dir)
    DATA_PATH = os.path.join(parent_dir, "data", "test.csv")

if os.path.exists(DATA_PATH):
    df = pd.read_csv(DATA_PATH)
else:
    print(f"Warning: Test data not found at {DATA_PATH}")
    df = pd.DataFrame() # Empty dataframe to prevent crash on import

@router.get("/next/{index}")
def get_flow(index: int):
    try:
        if index >= len(df):
            return {"end": True}

        row = df.iloc[index]
        # Handle NaN values for JSON serialization
        # Use a simpler approach if where() fails
        d = row.to_dict()
        clean_d = {}
        for k, v in d.items():
            if pd.isna(v):
                clean_d[k] = None
            else:
                clean_d[k] = v
        
        return {
            "index": index,
            "flow": clean_d,
            "end": False
        }
    except Exception as e:
        import traceback
        return {"error": str(e), "trace": traceback.format_exc(), "end": True}

# Import necessary modules for prediction
from routers.predict import model, predict_with_model, ATTACK_MAP
import numpy as np

@router.get("/stats")
def get_dashboard_stats():
    try:
        if df.empty:
            return {"error": "No data loaded"}
            
        # Limit to 100,000 rows for stats calculation as requested
        limit = 100000
        stats_df = df.head(limit)
        
        # Perform Prediction on the loaded data (replicating dashboard1.py logic)
        # We need to predict to get the actual current model's view of the data
        # Filter out non-feature columns explicitly
        feature_cols = [col for col in stats_df.columns if col not in ['Attack_type', 'Attack_encode']]
        X = stats_df[feature_cols]
        
        # Predict
        if model:
            preds = predict_with_model(model, X)
            # Convert predictions to labels
            # preds might be floats from XGBoost, cast to int
            pred_labels = [int(p) if isinstance(p, (int, float, np.number)) else int(p) for p in preds]
            pred_names = [ATTACK_MAP.get(p, 'Unknown') for p in pred_labels]
        else:
            # Fallback if model not loaded (shouldn't happen if app started correctly)
            return {"error": "Model not loaded"}

        # 1. Total Flows
        total_flows = len(stats_df)
        
        # 2. Attack Distribution (based on PREDICTIONS)
        attack_counts = {}
        for name in pred_names:
            attack_counts[name] = attack_counts.get(name, 0) + 1
            
        # 3. Protocol Distribution (All)
        if 'Protocol' in stats_df.columns:
            protocol_counts = stats_df['Protocol'].value_counts().head(10).to_dict()
        else:
            protocol_counts = {}
            
        # 4. Protocol Distribution (Malicious)
        malicious_protocol_counts = {}
        recent_threats = []
        
        # Filter malicious based on predictions
        # Create a temporary dataframe with predictions for filtering
        temp_df = stats_df.copy()
        temp_df['Predicted_Attack'] = pred_names
        
        malicious_df = temp_df[temp_df['Predicted_Attack'] != 'Benign']
        
        if not malicious_df.empty:
            if 'Protocol' in malicious_df.columns:
                malicious_protocol_counts = malicious_df['Protocol'].value_counts().head(10).to_dict()
            
            # 5. Recent Threats (Take last 20 malicious flows)
            threats_df = malicious_df.tail(20).iloc[::-1]
            
            for idx, row in threats_df.iterrows():
                recent_threats.append({
                    "id": int(idx),
                    "attack": row['Predicted_Attack'],
                    "protocol": row['Protocol'] if 'Protocol' in row else "Unknown",
                    "severity": "High", # Simplified for summary
                    "fwd_packets": int(row.get('Total Fwd Packets', 0)),
                    "bwd_packets": int(row.get('Total Backward Packets', 0))
                })

        return {
            "total_flows": total_flows,
            "attack_counts": attack_counts,
            "protocol_counts": protocol_counts,
            "malicious_protocol_counts": malicious_protocol_counts,
            "recent_threats": recent_threats
        }
    except Exception as e:
        import traceback
        print(traceback.format_exc())
        return {"error": str(e)}