transaction-analytics / pages /1_Analysis.py
Ajay-user's picture
Upload 28 files
89d10a2
import streamlit as st
from utils import FraudDetectionAnalysis, BivariateAnalysis, GenderAnalysis
st.title('Data Analysis')
analysis = FraudDetectionAnalysis()
bivariate = BivariateAnalysis()
gender_analytics = GenderAnalysis()
st.markdown("#### Categories")
st.caption('It appears that most number of transactions in the dataset fall under the `gas_transport` category and the least fall under the `travel` category. When it comes to fraudulent transactions the least is observed under the `travel` category, while the majority are under the category `shopping_net` &`grocery_net`.')
category, fraud_in_category = st.columns(2)
with category:
st.altair_chart(
analysis.plot_univariate(
key='Category', field="Transactions", title="All Transactions"),
use_container_width=True)
with fraud_in_category:
st.altair_chart(
analysis.plot_univariate(
key='Category', field='Fraud', title="Fraudulent Transaction"),
use_container_width=True)
st.markdown("#### Gender")
st.caption(
"We can clearly see that the majority of transactions in the dataset are made by females.")
st.altair_chart(
analysis.plot_univariate(
key='Gender', field="Transactions", title="Count"),
use_container_width=True)
st.caption(
'Fraudulent Transaction across different categories | Gender-wise analysis')
st.bar_chart(data=gender_analytics.data['category_gender'], x='category')
# heatmap
st.caption(
"Majority of frauds are observed under the category `shopping_net` & `grocery_pos`")
st.pyplot(
fig=gender_analytics.plot_heatmap(
key='category_gender_heatmap', xlabel="Category"
),
use_container_width=True)
# 'age_group_and_gender_heatmap'
st.markdown('''
* Majority of the fraudulent transactions are made by people belonging to the `age group 40 to 60`
* Fraudulent Transactions recorded among the `age-group 80+` could be **identity theft**
''')
st.pyplot(
fig=gender_analytics.plot_heatmap(
key='age_group_and_gender_heatmap', xlabel="Age Groups in the Dataset", rotation=0
),
use_container_width=True)
st.markdown("#### Month of an year")
st.caption(
"Majority of transactions in the dataset are made in December and most of them occur during the Weekend.")
month, fraud_in_month = st.columns(2)
with month:
st.altair_chart(
analysis.plot_univariate(
key='Month', field="Transactions", title="All Transactions"),
use_container_width=True)
with fraud_in_month:
st.altair_chart(
analysis.plot_univariate(
key='Month', field="Fraud", title="Fraudulent Transaction"),
use_container_width=True)
st.markdown("#### Day of Week")
day, fraud_in_day = st.columns(2)
with day:
st.altair_chart(
analysis.pie_chart(
key='Day of Week', field="Transactions", title="Transactions"),
use_container_width=True)
with fraud_in_day:
st.altair_chart(
analysis.pie_chart(
key='Day of Week', field="Fraud", title="Fraudulent Transaction"),
use_container_width=True)
st.markdown('#### Bivariate Analysis')
st.markdown("""
* Generally, people within the 40-60 years age group have done more fraudulent transactions
* Transactions made within the distance range 50 - 100 KM are the most in the dataset.
* Most of the Fraudulent transactions are recorded late at night.
""")
with st.spinner("๐Ÿ“Š...."):
feature, display = st.columns(2)
with feature:
bivariate_feature = st.selectbox(
label="Bivariate Analysis | Select the feature",
options=["Age Group", "Transaction Hour", "Distance range in KM"]
)
with display:
y_axis = st.selectbox(
label="Transaction",
options=['Fraudulent Transaction', 'Fair Transaction', 'Total Transaction'])
st.bar_chart(data=bivariate.get_data(bivariate_feature),
x=bivariate_feature, y=y_axis)