File size: 3,965 Bytes
89d10a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
from utils import FraudDetectionAnalysis, BivariateAnalysis, GenderAnalysis


st.title('Data Analysis')
analysis = FraudDetectionAnalysis()
bivariate = BivariateAnalysis()
gender_analytics = GenderAnalysis()


st.markdown("####  Categories")
st.caption('It appears that most number of transactions in the dataset fall under the `gas_transport` category and the least fall under the `travel` category. When it comes to fraudulent transactions the least is observed under the `travel` category, while the majority are under the category `shopping_net` &`grocery_net`.')
category, fraud_in_category = st.columns(2)
with category:
    st.altair_chart(
        analysis.plot_univariate(
            key='Category', field="Transactions", title="All Transactions"),
        use_container_width=True)
with fraud_in_category:
    st.altair_chart(
        analysis.plot_univariate(
            key='Category', field='Fraud', title="Fraudulent Transaction"),
        use_container_width=True)


st.markdown("####  Gender")
st.caption(
    "We can clearly see that the majority of transactions in the dataset are made by females.")
st.altair_chart(
    analysis.plot_univariate(
        key='Gender', field="Transactions", title="Count"),
    use_container_width=True)

st.caption(
    'Fraudulent Transaction across different categories | Gender-wise analysis')
st.bar_chart(data=gender_analytics.data['category_gender'], x='category')
# heatmap
st.caption(
    "Majority of frauds are observed under the category  `shopping_net` &  `grocery_pos`")
st.pyplot(
    fig=gender_analytics.plot_heatmap(
        key='category_gender_heatmap', xlabel="Category"
    ),
    use_container_width=True)
# 'age_group_and_gender_heatmap'
st.markdown('''
* Majority of the fraudulent transactions are made by people belonging to the `age group 40 to 60`
* Fraudulent Transactions recorded among the `age-group 80+` could be **identity theft**
''')
st.pyplot(
    fig=gender_analytics.plot_heatmap(
        key='age_group_and_gender_heatmap', xlabel="Age Groups in the Dataset", rotation=0
    ),
    use_container_width=True)


st.markdown("####  Month of an year")
st.caption(
    "Majority of transactions in the dataset are made in December and most of them occur during the Weekend.")
month, fraud_in_month = st.columns(2)
with month:
    st.altair_chart(
        analysis.plot_univariate(
            key='Month', field="Transactions", title="All Transactions"),
        use_container_width=True)
with fraud_in_month:
    st.altair_chart(
        analysis.plot_univariate(
            key='Month', field="Fraud", title="Fraudulent Transaction"),
        use_container_width=True)


st.markdown("####  Day of Week")
day, fraud_in_day = st.columns(2)
with day:
    st.altair_chart(
        analysis.pie_chart(
            key='Day of Week', field="Transactions", title="Transactions"),
        use_container_width=True)
with fraud_in_day:
    st.altair_chart(
        analysis.pie_chart(
            key='Day of Week', field="Fraud", title="Fraudulent Transaction"),
        use_container_width=True)

st.markdown('####  Bivariate Analysis')
st.markdown("""
* Generally, people within the 40-60 years age group have done more fraudulent transactions
* Transactions made within the distance range 50 - 100 KM are the most in the dataset.
* Most of the Fraudulent transactions are recorded late at night.
""")
with st.spinner("📊...."):
    feature, display = st.columns(2)
    with feature:
        bivariate_feature = st.selectbox(
            label="Bivariate Analysis | Select the feature",
            options=["Age Group", "Transaction Hour", "Distance range in KM"]
        )
    with display:
        y_axis = st.selectbox(
            label="Transaction",
            options=['Fraudulent Transaction', 'Fair Transaction', 'Total Transaction'])
    st.bar_chart(data=bivariate.get_data(bivariate_feature),
                 x=bivariate_feature, y=y_axis)