import streamlit as st from utils import FraudDetectionAnalysis, BivariateAnalysis, GenderAnalysis st.title('Data Analysis') analysis = FraudDetectionAnalysis() bivariate = BivariateAnalysis() gender_analytics = GenderAnalysis() st.markdown("#### Categories") st.caption('It appears that most number of transactions in the dataset fall under the `gas_transport` category and the least fall under the `travel` category. When it comes to fraudulent transactions the least is observed under the `travel` category, while the majority are under the category `shopping_net` &`grocery_net`.') category, fraud_in_category = st.columns(2) with category: st.altair_chart( analysis.plot_univariate( key='Category', field="Transactions", title="All Transactions"), use_container_width=True) with fraud_in_category: st.altair_chart( analysis.plot_univariate( key='Category', field='Fraud', title="Fraudulent Transaction"), use_container_width=True) st.markdown("#### Gender") st.caption( "We can clearly see that the majority of transactions in the dataset are made by females.") st.altair_chart( analysis.plot_univariate( key='Gender', field="Transactions", title="Count"), use_container_width=True) st.caption( 'Fraudulent Transaction across different categories | Gender-wise analysis') st.bar_chart(data=gender_analytics.data['category_gender'], x='category') # heatmap st.caption( "Majority of frauds are observed under the category `shopping_net` & `grocery_pos`") st.pyplot( fig=gender_analytics.plot_heatmap( key='category_gender_heatmap', xlabel="Category" ), use_container_width=True) # 'age_group_and_gender_heatmap' st.markdown(''' * Majority of the fraudulent transactions are made by people belonging to the `age group 40 to 60` * Fraudulent Transactions recorded among the `age-group 80+` could be **identity theft** ''') st.pyplot( fig=gender_analytics.plot_heatmap( key='age_group_and_gender_heatmap', xlabel="Age Groups in the Dataset", rotation=0 ), use_container_width=True) st.markdown("#### Month of an year") st.caption( "Majority of transactions in the dataset are made in December and most of them occur during the Weekend.") month, fraud_in_month = st.columns(2) with month: st.altair_chart( analysis.plot_univariate( key='Month', field="Transactions", title="All Transactions"), use_container_width=True) with fraud_in_month: st.altair_chart( analysis.plot_univariate( key='Month', field="Fraud", title="Fraudulent Transaction"), use_container_width=True) st.markdown("#### Day of Week") day, fraud_in_day = st.columns(2) with day: st.altair_chart( analysis.pie_chart( key='Day of Week', field="Transactions", title="Transactions"), use_container_width=True) with fraud_in_day: st.altair_chart( analysis.pie_chart( key='Day of Week', field="Fraud", title="Fraudulent Transaction"), use_container_width=True) st.markdown('#### Bivariate Analysis') st.markdown(""" * Generally, people within the 40-60 years age group have done more fraudulent transactions * Transactions made within the distance range 50 - 100 KM are the most in the dataset. * Most of the Fraudulent transactions are recorded late at night. """) with st.spinner("📊...."): feature, display = st.columns(2) with feature: bivariate_feature = st.selectbox( label="Bivariate Analysis | Select the feature", options=["Age Group", "Transaction Hour", "Distance range in KM"] ) with display: y_axis = st.selectbox( label="Transaction", options=['Fraudulent Transaction', 'Fair Transaction', 'Total Transaction']) st.bar_chart(data=bivariate.get_data(bivariate_feature), x=bivariate_feature, y=y_axis)