import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from catboost import CatBoostRegressor
import os

def predict_runoff_triangle(file_path):
    """
    Processes a run-off triangle file (CSV or Excel), trains a CatBoost model to predict missing claims,
    and saves the completed run-off triangle to a new CSV file.

    Parameters:
    - file_path (str): Path to the input file (CSV or Excel).

    Returns:
    - output_file (str): Path to the output CSV file with the completed run-off triangle.
    """

    # Read file (CSV or Excel)
    df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path)
    df.columns = df.columns.str.strip()

    # Validate required column
    if "Accident Year" not in df.columns:
        print("Error: 'Accident Year' column is missing in the uploaded file.")
        return None

    # Reshape data for modeling
    df_long = df.melt(id_vars=["Accident Year"], var_name="Development Year", value_name="Paid Claims")
    df_long["Development Year"] = pd.to_numeric(df_long["Development Year"], errors='coerce')

    # Split data into training and prediction sets
    train_data = df_long.dropna(subset=["Paid Claims"])
    predict_data = df_long[df_long["Paid Claims"].isna()]

    X = train_data[["Accident Year", "Development Year"]]
    y = train_data["Paid Claims"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train CatBoost model
    model = CatBoostRegressor(iterations=300, learning_rate=0.03, depth=4, 
                              loss_function='RMSE', verbose=100, l2_leaf_reg=3)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=100)

    # Evaluate model
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    mae_train = mean_absolute_error(y_train, y_train_pred)
    mae_test = mean_absolute_error(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)

    print(f"Model Train MAE: {mae_train:.2f}, R²: {r2_train:.2f}")
    print(f"Model Test MAE: {mae_test:.2f}, R²: {r2_test:.2f}")

    # Predict missing claims
    X_predict = predict_data[["Accident Year", "Development Year"]]
    predicted_claims = model.predict(X_predict)

    df_long.loc[df_long["Paid Claims"].isna(), "Paid Claims"] = predicted_claims

    # Reshape back to triangle format
    completed_df = df_long.pivot(index="Accident Year", columns="Development Year", values="Paid Claims")

    # Create output file path
    base, ext = os.path.splitext(file_path)
    output_file = f"{base}_prediction.csv"

    # Save predictions
    completed_df.to_csv(output_file)
    print(f"Prediction complete! Results saved to {output_file}")

    # Learning Curve Plot
    train_sizes = np.linspace(0.1, 1.0, 10)
    train_errors, test_errors = [], []

    for train_size in train_sizes:
        subset_size = int(len(X_train) * train_size)
        X_subset, y_subset = X_train[:subset_size], y_train[:subset_size]

        model.fit(X_subset, y_subset, verbose=0)
        train_pred = model.predict(X_subset)
        test_pred = model.predict(X_test)

        train_errors.append(mean_absolute_error(y_subset, train_pred))
        test_errors.append(mean_absolute_error(y_test, test_pred))

  

    return output_file




def generate_runoff_triangle(file_path, output_file="runoff_triangle.csv"):
    try:
        # Load the .xlsm file
        df = pd.read_excel(file_path, parse_dates=["Date Survenance"], engine="openpyxl")

        # Strip spaces and fix column names
        df.columns = df.columns.str.strip()
        df.rename(columns={"Exercice": "Year of Settlement", "Règlement": "Settlement Amount"}, inplace=True)

        # Convert "Settlement Amount" to numeric (handling commas)
        df["Settlement Amount"] = pd.to_numeric(df["Settlement Amount"].astype(str).str.replace(",", ""), errors="coerce")

        # Extract Year of Occurrence (Accident Year)
        df["Accident Year"] = df["Date Survenance"].dt.year

        # Compute Development Year
        df["Development Year"] = df["Year of Settlement"] - df["Accident Year"] 

        # Aggregate settlement amounts per Accident Year & Development Year
        triangle_data = df.groupby(["Accident Year", "Development Year"])["Settlement Amount"].sum().reset_index()

        # Pivot to create the run-off triangle (Development Table)
        triangle = triangle_data.pivot(index="Accident Year", columns="Development Year", values="Settlement Amount")

        # Ensure cumulative values across development years
        triangle = triangle.cumsum(axis=1)

        # Save the run-off triangle to a CSV file
        triangle.to_csv(output_file, index=True)

        print(f"Run-off triangle saved to {output_file}")
        return output_file
    except Exception as e:
        print(f"Error: {e}")
        return None

# Example usage:
# file_path = "Base de Données MATHURANCE.xlsm"
# generate_runoff_triangle(file_path)



def fill_runoff_triangle_csv(file):
    """
    Reads a run-off triangle from a CSV file, fills in missing values using development factors,
    and writes the filled triangle to a new CSV file.

    Parameters:
    - file (str): Path to the input CSV file.

    Returns:
    - output_file (str): Path to the output CSV file containing the filled run-off triangle.
    """

    # Read the CSV file
    triangle_df = pd.read_csv(file)

    # Preserve the "Accident Year" column and get triangle values for processing
    triangle_values = triangle_df.drop(columns=["Accident Year"]).copy()

    # Calculate development factors for each adjacent pair of columns
    development_factors = []
    for col in range(triangle_values.shape[1] - 1):
        current_col = triangle_values.iloc[:, col]
        next_col = triangle_values.iloc[:, col + 1]
        valid_indices = (~current_col.isna()) & (~next_col.isna())
        factors = next_col[valid_indices] / current_col[valid_indices]
        development_factors.append(factors.mean())

    # Fill in missing values by working backwards in the triangle
    for row in reversed(range(triangle_values.shape[0])):
        for col in reversed(range(1, triangle_values.shape[1])):
            if pd.isna(triangle_values.iloc[row, col - 1]) and not pd.isna(triangle_values.iloc[row, col]):
                triangle_values.iloc[row, col - 1] = triangle_values.iloc[row, col] / development_factors[col - 1]

    # Combine the Accident Year column with the filled triangle values
    filled_triangle_df = pd.concat([triangle_df[["Accident Year"]], triangle_values], axis=1)

    # Create an output file name by appending "_filled" before the file extension
    base, ext = os.path.splitext(file)
    output_file = f"{base}_filled.csv"

    # Write the filled DataFrame to the new CSV file
    filled_triangle_df.to_csv(output_file, index=False)

    print(f"Filled run-off triangle saved to: {output_file}")
    return output_file


css = """
.container { max-width: 900px; margin: auto; padding: 20px;  }
.header { text-align: center; margin-bottom: 40px; }
.instructions { background: #f0f0f0; padding: 20px; border-radius: 10px; margin-bottom: 30px; }
.tab-buttons { margin-bottom: 20px;}
.hide-label label {display: none !important;}
"""

with gr.Blocks(theme=gr.themes.Base(), css=css) as app:
    gr.Image("Untitled_design__2___1_-removebg-preview.png",elem_classes="hide-label label")
    with gr.Tabs():
        with gr.Tab("Generate Loss Triangle"):
            with gr.Column(elem_classes="container"):
                gr.Markdown("## Development Loss Triangle Generator")
                with gr.Row():
                    file_input = gr.File(label="Upload Claims Data")
                    submit_btn = gr.Button("Process File", variant="primary")
                file_output = gr.File(label="Download Triangle")
                submit_btn.click(generate_runoff_triangle, file_input, file_output)

        with gr.Tab("Estimate Run-Off"):
            with gr.Column(elem_classes="container"):
                gr.Markdown("## Run-Off Triangle Estimator")
                with gr.Row():
                    estimate_input = gr.File(label="Upload Run-Off Triangle")
                    estimate_btn = gr.Button("Estimate", variant="primary")
                estimate_output = gr.File(label="Download Filled Triangle")
                estimate_btn.click(fill_runoff_triangle_csv, estimate_input, estimate_output)
        with gr.Tab("predict the newer losses"):
            with gr.Column(elem_classes="container"):
                gr.Markdown("## predict the newer losses")
                with gr.Row():
                    predict_input = gr.File(label="Upload the estimated data")
                    predict_btn = gr.Button("Predict", variant="primary")
                predict_output = gr.File(label="Download the predicted data")
                predict_btn.click(predict_runoff_triangle, predict_input, predict_output)
         

app.launch()