import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import google.generativeai as genai

# ================================
# CONFIG
# ================================

genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

MODEL_PATH = "fraud_model.pkl"

# ================================
# DATA GENERATION (SIMULATED)
# ================================

def generate_dataset():
    np.random.seed(42)

    data = pd.DataFrame({
        "amount": np.random.uniform(10, 5000, 2000),
        "old_balance": np.random.uniform(0, 10000, 2000),
        "new_balance": np.random.uniform(0, 10000, 2000),
        "transactions_per_day": np.random.randint(1, 40, 2000),
        "fraud": np.random.randint(0, 2, 2000)
    })

    return data

# ================================
# MODEL TRAINING
# ================================

def train_model():
    data = generate_dataset()

    X = data.drop("fraud", axis=1)
    y = data["fraud"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    model = RandomForestClassifier(
        n_estimators=200,
        max_depth=10,
        random_state=42
    )

    model.fit(X_train, y_train)

    acc = accuracy_score(y_test, model.predict(X_test))
    print(f"Model Accuracy: {acc * 100:.2f}%")

    joblib.dump(model, MODEL_PATH)
    return model

# ================================
# LOAD OR TRAIN MODEL
# ================================

def load_model():
    if os.path.exists(MODEL_PATH):
        return joblib.load(MODEL_PATH)
    return train_model()

model = load_model()

# ================================
# GEMINI EXPLANATION ENGINE
# ================================

def explain_prediction(features, prediction):
    model = genai.GenerativeModel("gemini-2.5-flash")

    prompt = f"""
    You are an AI fraud analyst.

    Transaction Details:
    - Amount: {features[0]}
    - Old Balance: {features[1]}
    - New Balance: {features[2]}
    - Transactions per day: {features[3]}

    Prediction: {"Fraud" if prediction == 1 else "Legitimate"}

    Provide a professional fraud analysis explanation.
    """

    response = model.generate_content(prompt)
    return response.text
    
# ================================
# PREDICTION PIPELINE
# ================================
def detect_fraud(amount, old_balance, new_balance, transactions_per_day):
    features = np.array([amount, old_balance, new_balance, transactions_per_day]).reshape(1, -1)
    
    pred = model.predict(features)[0]
    # Get probability for the 'Fraud' class (assumed index 1)
    fraud_prob = model.predict_proba(features)[0][1]

    # Only explain if it's likely fraud OR the model is very unsure (near 0.5)
    if pred == 1 or (0.4 < fraud_prob < 0.6):
        explanation = explain_prediction(features[0], pred)
    else:
        explanation = "Transaction processed normally; no anomaly detected."

    return pred, fraud_prob, explanation