import geopandas as gpd
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from adjustText import adjust_text

# import math
# from datetime import date, time

# import scikitplot as skplot

# from sklearn.preprocessing import MinMaxScaler
# from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, ExtraTreesClassifier, RandomForestRegressor
# from sklearn.linear_model import LinearRegression, LogisticRegression

# from sklearn.cluster import KMeans
# from sklearn.decomposition import PCA

# from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

# from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score

# from imblearn.over_sampling import RandomOverSampler

from filepath_help import directory


def get_us_state_fraudulent_transaction():
    return pd.read_csv(
        directory['us_state_fraudulent_transaction'])


def get_us_geo_date():
    data = gpd.read_file(directory['us_geo_data'])
    data = data.drop_duplicates()
    return data


def get_us_street_fraudulent_transaction():
    return pd.read_csv(directory['us_streets'])


def get_bivariate_data():
    return pd.read_csv(directory['bivariate_analysis'])


def get_analysis_data():
    analysis = {
        'Category': pd.read_csv(directory['category']),
        'Gender': pd.read_csv(directory['gender']),
        'Month': pd.read_csv(directory['month']),
        'Day of Week': pd.read_csv(directory['day_of_week']),
    }
    return analysis


def get_gender_analysis_data():
    data = {
        'category_gender': pd.read_csv(directory['category_gender']),
        'category_gender_heatmap': pd.read_csv(directory['category_gender_heatmap']),
        'age_group_and_gender_heatmap': pd.read_csv(directory['age_group_and_gender_heatmap']),
    }
    return data


def get_age_analysis_data():
    data = pd.read_csv(directory['age_amount'])
    return data


class FraudDetection:
    def __init__(self) -> None:
        self.us_state_fraudulent_transaction = get_us_state_fraudulent_transaction()
        self.us_streets_fraudulent_transaction = get_us_street_fraudulent_transaction()
        self.geo_states = get_us_geo_date()

    def plot_class_imbalance(self, field='CombinedData'):
        class_imbalance = pd.read_csv(directory['class_imbalance_stats'])
        chart = (
            alt.Chart(data=class_imbalance)
            .mark_arc(
                cornerRadius=4, padAngle=0.008)
            .encode(
                color='isFraud', theta=field, tooltip=['isFraud', field])
            .properties(
                title='Class imbalance in the '+field
            )
        )
        return chart

    def plot_us_states_fraudulent_transaction(self,):
        chart = (
            alt.Chart(data=self.us_state_fraudulent_transaction)
            .mark_bar(
                cornerRadius=5)
            .encode(x='state', y=alt.Y('is_fraud', title="Fraudulent Transaction"),

                    )
            .properties(title="Fraudulent Transaction across United States of America")
        )
        return chart

    def plot_geo_data_us_states(self, region):
        states = self.geo_states if region == 'All regions' else self.geo_states.query(
            f'region == "{region}"')

        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20, 30))
        states.plot(ax=ax, color='dodgerblue')
        ax.axis(False)

        texts = []
        for short, name, geo in zip(states.STUSPS, states.NAME, states.geometry):
            x, y = geo.centroid.coords[0]
            ax.scatter(x=x, y=y,
                       marker=f'${short}$', s=400)
            texts.append(ax.text(x=x, y=y, s=name))

        adjust_text(texts, arrowprops={
                    'arrowstyle': '->', 'color': 'k'}, expand_points=(2, 2))
        return fig

    def plot_selected_states(self, selection):
        if len(selection) == 0:
            selection = ['MT', 'NY', 'CA', 'FL']
        data = self.geo_states.query(f"STUSPS in {selection}")

        fig, ax = plt.subplots(nrows=1, ncols=1)
        data.boundary.plot(ax=ax)
        ax.axis(False)

        sample = self.us_state_fraudulent_transaction.merge(
            data, left_on='state', right_on='STUSPS')
        for name, count, geo in zip(sample.NAME, sample.is_fraud, sample.geometry):
            ax.annotate(text=f"{name}={count}", xy=geo.centroid.coords[0])
        return fig

    def plot_street_level_fraudulent_transaction(self, state):
        data = self.us_streets_fraudulent_transaction.set_index(
            keys=['state']).loc[state]

        chart = (
            alt.Chart(data=data)
            .mark_bar(
                cornerRadius=5)
            .encode(x='street', y=alt.Y('is_fraud', title=""),
                    tooltip=[
                        alt.Tooltip(
                            'is_fraud', title="Fraudulent Transaction"),
                        alt.Tooltip('street', title=f"Street"),
            ]
            )
            .properties(title="Fraudulent Transaction across American Streets")
        )
        return chart


# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ANALYSIS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #

class FraudDetectionAnalysis:
    def __init__(self):
        self.data = get_analysis_data()

    def plot_univariate(self, key, field=None, title="Transactions"):
        data = self.data[key]
        chart = (
            alt.Chart(data=data, title=title)
            .mark_bar(cornerRadius=25)
            .encode(
                y=alt.Y(key,
                        title=key.capitalize()),
                x=alt.X(field, title=title),
            )

        )
        return chart

    def pie_chart(self, key, field=None, title='Transactions'):
        data = self.data[key]
        chart = (
            alt.Chart(data=data, title=title)
            .mark_arc()
            .encode(
                theta=alt.Theta(f"{field}:Q", title=title),
                color=alt.Color(f"{key}:N", title=key)
            )

        )
        return chart


class BivariateAnalysis:
    def __init__(self) -> None:
        self.data = get_bivariate_data()

    def get_data(self, key):
        # return self.data[[key, "Fraudulent Transaction"]].groupby(by=key).sum().reset_index()
        sample = (
            self.data[["Fraudulent Transaction", key]].groupby(
                [key, "Fraudulent Transaction"])
            .value_counts().unstack(level=1)
        )

        sample.columns.name = ''

        sample = (
            sample.reset_index()
            .rename({0: "Fair Transaction", 1: "Fraudulent Transaction"}, axis=1)
        )
        sample['Total Transaction'] = sample['Fair Transaction'] + \
            sample['Fraudulent Transaction']
        return sample


class GenderAnalysis:
    def __init__(self) -> None:
        self.data = get_gender_analysis_data()

    def plot_heatmap(
            self, key,
            index='gender', rotation=80, cmap='Blues',
            xlabel='X axis', ylabel='Gender'
    ):
        data = self.data[key].set_index(index)

        fig, ax = plt.subplots(nrows=1, ncols=1)

        ax.tick_params(color='tab:blue', labelcolor='gray', width=2)
        for spine in ax.spines.values():
            spine.set_edgecolor('tab:blue')
            spine.set_linewidth(2)

        y, x = data.shape
        ax.matshow(data.values, cmap=cmap)
        ax.set_xticks(np.arange(x), data.columns, rotation=rotation)
        ax.set_yticks(np.arange(y), data.index)
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)

        text = [ax.text(x=j, y=i, s=f'{data.values[i][j]}', ha='center', va='center')
                for i in np.arange(y) for j in np.arange(x)]

        return fig


class AgeAnalysis:
    def __init__(self) -> None:
        self.data = get_age_analysis_data()

    def age_group_count_plot(self,):
        with plt.style.context('fivethirtyeight'):
            fig, ax = plt.subplots(nrows=1, ncols=1)
            sns.countplot(data=self.data, x='AgeGroup', ax=ax)
            return fig

    def age_violin_plot(self,):
        with plt.style.context('ggplot'):
            fig, ax = plt.subplots(nrows=1, ncols=1)
            sns.violinplot(
                data=self.data.sample(5000), x='Age', y='Gender', hue='Gender',
                ax=ax, split=True, scale='count', linewidth=4, fontsize=16)
            return fig

    def age_gender_stats_interactive(self, category: list[str]):
        category = ["<25", "25-40"] if len(category) == 0 else category
        data = self.data.query(f'AgeGroup in {category}')
        data = data.sample(5000) if 5000 < data.shape[0] else data
        with plt.style.context('ggplot'):
            fig, ax = plt.subplots(nrows=1, ncols=1)
            sns.violinplot(
                data=data,
                x='Age', y='AgeGroup', hue='Gender', split=True)
            return fig

    def plot_age(
            self, color_encode=False, element='poly',
            sample_size=1000, binrange=None, binwith=None,
            kde=False, fill=True, hatch=''):
        data = self.data

        fig, ax = plt.subplots(nrows=1, ncols=1)
        ax.tick_params(color='tab:blue', labelcolor='gray', width=2)
        for spine in ax.spines.values():
            spine.set_edgecolor('tab:blue')
            spine.set_linewidth(2)

        with plt.style.context('fivethirtyeight'):
            sns.histplot(
                data=data.sample(sample_size), x='Age', element=element,
                hue='AgeGroup' if color_encode else None,
                binrange=binrange, binwidth=binwith,
                ax=ax, kde=kde, fill=fill, hatch=hatch)

            return fig

    def age_realted_query(self, query):
        search = {
            'Less than 25': "Age<25",
            'Between 25 and 50': "Age > 25 and Age < 50 ",
            'Below 50': "Age < 50",
            'Above 50': "Age > 50",
            'Between 50 and 60': "Age > 50 and Age < 60",
            'Above 60': "Age > 60",
            'Above 80': "Age > 80"
        }
        binwidth = 5 if query in ['Less than 25', 'Between 50 and 60'] else 10
        res = self.data.query(search[query])
        grid = sns.FacetGrid(
            data=res, col='Fraud', sharey=False,
        )
        grid.map_dataframe(func=sns.histplot, x='Age',
                           binwidth=binwidth, hatch='-', ec='white')
        return res, grid.figure

    def KDE_plot_age_group_and_transaction_amount(
            self, query, sample_size=1000, fraud_only=False, age_group="All"):
        search = {
            'Greater than 1000': "Amount > 1000",
            'Less than 1000': "Amount < 1000",
            'Less than 500': "Amount < 500",
            'Less than 300': "Amount < 300",
            'Less than 100': "Amount < 200",
        }
        data = self.data.query(search[query])
        data = data.query("Fraud=='Yes'") if fraud_only else data
        data = data.query(
            f"AgeGroup=='{age_group}'") if age_group != 'All' else data
        data = data.sample(
            sample_size) if sample_size <= data.shape[0] else data

        with plt.style.context('fivethirtyeight'):

            fig, ax = plt.subplots(nrows=1, ncols=1)
            sns.kdeplot(
                data=data, x='Age', y='Amount',  hue='AgeGroup', fill=True, ax=ax)
            ax.grid(color='white')
            # fig.set_facecolor("white")
        return fig

    def compare_transactions_across_age_group(
        self, sample_size=1000, lowerbound=0, upperbound=100
    ):
        data = self.data.query(
            f"Amount >={lowerbound} and Amount <= {upperbound}")
        data = data.sample(
            sample_size) if sample_size < data.shape[0] else data
        with plt.style.context('fivethirtyeight'):
            fig, ax = plt.subplots(nrows=1, ncols=1)
            sns.histplot(
                data=data, x='Amount',
                y='AgeGroup', hue='AgeGroup', ax=ax)
            ax.grid(color='white', linewidth=2)
        return fig

    def transaction_amount_study(self, query: str = 'Less than 500', age_group: str = 'All'):
        search = {
            'Less than 1000': "Amount < 1000",
            'Less than 500': "Amount < 500",
            'Less than 300': "Amount < 300",
            'Less than 100': "Amount < 200",
        }

        data = self.data.query(search[query])
        data = data.query(
            f"AgeGroup=='{age_group}'") if age_group != 'All' else data

        with plt.style.context('fivethirtyeight'):
            fig, ax = plt.subplots(nrows=1, ncols=1)
            sns.histplot(
                data=data, x='Amount',
                hue='AgeGroup', element='poly', ax=ax)
        return fig