| import pandas as pd |
| import numpy as np |
| import gradio as gr |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| from datetime import datetime |
| from sklearn.metrics import confusion_matrix, precision_score, recall_score |
| from PIL import Image |
| import requests |
| from io import BytesIO |
|
|
| |
| logo_url = "https://www.google.com/url?sa=i&url=https%3A%2F%2Fwww.facebook.com%2Fsabpaisa%2F&psig=AOvVaw1tJqk9DKF8WgvbGj1H004X&ust=1742646042537000&source=images&cd=vfe&opi=89978449&ved=0CBEQjRxqFwoTCPjmiLiUm4wDFQAAAAAdAAAAABAE" |
| response = requests.get(logo_url) |
| logo = Image.open(BytesIO(response.content)) |
|
|
| |
| data = { |
| 'transaction_amount': [2500, 799, 9338, 11749, 8999, 1500, 3000, 4000, 300, 5000, 24990], |
| 'transaction_date': ['01-11-2024 16:08', '01-11-2024 16:15', '02-11-2024 14:43', '03-11-2024 11:14', |
| '04-11-2024 12:54', '06-11-2024 08:36', '06-11-2024 08:56', '06-11-2024 09:08', |
| '06-11-2024 09:29', '06-11-2024 13:05', '06-11-2024 15:12'], |
| 'transaction_channel': ['mobile', 'mobile', 'mobile', 'mobile', 'mobile', 'W', 'W', 'W', 'W', 'W', 'mobile'], |
| 'is_fraud': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], |
| 'transaction_payment_mode_anonymous': [10, 10, 2, 6, 2, 10, 10, 10, 10, 10, 2], |
| 'payment_gateway_bank_anonymous': [6, 6, 6, 58, 6, 6, 6, 6, 6, 6, 6], |
| 'payer_browser_anonymous': [1833, 1833, 2766, 3378, 2766, 3212, 3212, 3212, 3212, 3212, 2721], |
| 'transaction_id_anonymous': ['ANON_9629', 'ANON_9764', 'ANON_27514', 'ANON_41176', 'ANON_66597', |
| 'ANON_134329', 'ANON_134618', 'ANON_134815', 'ANON_135218', |
| 'ANON_147464', 'ANON_155578'], |
| 'payee_id_anonymous': ['ANON_47', 'ANON_47', 'ANON_265', 'ANON_8', 'ANON_265', 'ANON_12', |
| 'ANON_12', 'ANON_12', 'ANON_12', 'ANON_12', 'ANON_265'] |
| } |
|
|
| df = pd.DataFrame(data) |
|
|
| df['transaction_date'] = pd.to_datetime(df['transaction_date'], format='%d-%m-%Y %H:%M') |
|
|
| np.random.seed(42) |
| df['is_fraud_predicted'] = np.random.choice([0, 1], size=len(df), p=[0.3, 0.7]) |
| df['is_fraud_reported'] = np.random.choice([0, 1], size=len(df), p=[0.4, 0.6]) |
|
|
| def filter_data(start_date, end_date, payer_id, payee_id, transaction_id): |
| filtered_df = df.copy() |
| |
| start_date = pd.to_datetime(start_date) |
| end_date = pd.to_datetime(end_date) |
| |
| filtered_df = filtered_df[(filtered_df['transaction_date'] >= start_date) & |
| (filtered_df['transaction_date'] <= end_date)] |
| |
| if payer_id: |
| filtered_df = filtered_df[filtered_df['transaction_id_anonymous'] == payer_id] |
| |
| if payee_id: |
| filtered_df = filtered_df[filtered_df['payee_id_anonymous'] == payee_id] |
| |
| if transaction_id: |
| filtered_df = filtered_df[filtered_df['transaction_id_anonymous'] == transaction_id] |
| |
| return filtered_df |
|
|
| def create_comparison_chart(dimension, filtered_df): |
| if filtered_df.empty: |
| return plt.figure() |
| |
| plt.figure(figsize=(10, 6)) |
| |
| if dimension == 'Transaction Channel': |
| group_col = 'transaction_channel' |
| elif dimension == 'Transaction Payment Mode': |
| group_col = 'transaction_payment_mode_anonymous' |
| elif dimension == 'Payment Gateway Bank': |
| group_col = 'payment_gateway_bank_anonymous' |
| elif dimension == 'Payer ID': |
| group_col = 'transaction_id_anonymous' |
| elif dimension == 'Payee ID': |
| group_col = 'payee_id_anonymous' |
| else: |
| return plt.figure() |
| |
| predicted = filtered_df.groupby(group_col)['is_fraud_predicted'].sum() |
| reported = filtered_df.groupby(group_col)['is_fraud_reported'].sum() |
| |
| plot_df = pd.DataFrame({ |
| 'Predicted Fraud': predicted, |
| 'Reported Fraud': reported |
| }) |
| |
| plot_df.plot(kind='bar', figsize=(10, 6)) |
| plt.title(f'Fraud Comparison by {dimension}') |
| plt.ylabel('Count') |
| plt.xlabel(dimension) |
| plt.tight_layout() |
| |
| return plt |
|
|
| def create_time_series(filtered_df, granularity): |
| if filtered_df.empty: |
| return plt.figure() |
| |
| plt.figure(figsize=(12, 6)) |
| |
| if granularity == 'Day': |
| time_group = filtered_df['transaction_date'].dt.date |
| elif granularity == 'Hour': |
| time_group = filtered_df['transaction_date'].dt.strftime('%Y-%m-%d %H') |
| elif granularity == 'Minute': |
| time_group = filtered_df['transaction_date'].dt.strftime('%Y-%m-%d %H:%M') |
| else: |
| return plt.figure() |
| |
| predicted = filtered_df.groupby(time_group)['is_fraud_predicted'].sum() |
| reported = filtered_df.groupby(time_group)['is_fraud_reported'].sum() |
| |
| plt.plot(predicted.index, predicted.values, 'b-', label='Predicted Fraud') |
| plt.plot(reported.index, reported.values, 'r-', label='Reported Fraud') |
| plt.title('Fraud Trend Over Time') |
| plt.ylabel('Count') |
| plt.xlabel('Time') |
| plt.legend() |
| plt.xticks(rotation=45) |
| plt.tight_layout() |
| |
| return plt |
|
|
| def calculate_metrics(filtered_df): |
| if filtered_df.empty: |
| return None, 0, 0 |
| |
| cm = confusion_matrix(filtered_df['is_fraud'], filtered_df['is_fraud_predicted']) |
| |
| precision = precision_score(filtered_df['is_fraud'], filtered_df['is_fraud_predicted'], zero_division=0) |
| recall = recall_score(filtered_df['is_fraud'], filtered_df['is_fraud_predicted'], zero_division=0) |
| |
| plt.figure(figsize=(6, 5)) |
| sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', |
| xticklabels=['Not Fraud', 'Fraud'], |
| yticklabels=['Not Fraud', 'Fraud']) |
| plt.ylabel('Actual') |
| plt.xlabel('Predicted') |
| plt.title('Confusion Matrix') |
| |
| return plt, precision, recall |
|
|
| def update_interface(start_date, end_date, payer_id, payee_id, transaction_id, dimension, time_granularity): |
| filtered_df = filter_data(start_date, end_date, payer_id, payee_id, transaction_id) |
| |
| comparison_chart = create_comparison_chart(dimension, filtered_df) |
| |
| time_series = create_time_series(filtered_df, time_granularity) |
| |
| confusion_matrix_plot, precision, recall = calculate_metrics(filtered_df) |
| |
| display_df = filtered_df.copy() |
| display_df['transaction_date'] = display_df['transaction_date'].dt.strftime('%Y-%m-%d %H:%M') |
| |
| return (display_df.to_dict('records'), |
| comparison_chart, |
| time_series, |
| confusion_matrix_plot, |
| f"Precision: {precision:.4f}", |
| f"Recall: {recall:.4f}") |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# Fraud Transaction Analysis Dashboard") |
| |
| gr.Image(logo, show_label=False, width=200) |
| |
| with gr.Row(): |
| with gr.Column(): |
| start_date = gr.Textbox(label="Start Date (YYYY-MM-DD)", value="2024-11-01") |
| end_date = gr.Textbox(label="End Date (YYYY-MM-DD)", value="2024-11-06") |
| |
| with gr.Column(): |
| payer_id = gr.Textbox(label="Payer ID") |
| payee_id = gr.Textbox(label="Payee ID") |
| transaction_id = gr.Textbox(label="Transaction ID") |
| |
| with gr.Row(): |
| dimension = gr.Dropdown( |
| ["Transaction Channel", "Transaction Payment Mode", "Payment Gateway Bank", "Payer ID", "Payee ID"], |
| label="Comparison Dimension", |
| value="Transaction Channel" |
| ) |
| time_granularity = gr.Dropdown( |
| ["Day", "Hour", "Minute"], |
| label="Time Granularity", |
| value="Day" |
| ) |
| |
| update_button = gr.Button("Update Dashboard") |
| |
| with gr.Row(): |
| gr.Markdown("## Transaction Data") |
| |
| data_table = gr.DataFrame() |
| |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("## Fraud Comparison by Dimension") |
| comparison_plot = gr.Plot() |
| |
| with gr.Column(): |
| gr.Markdown("## Fraud Trend Over Time") |
| time_series_plot = gr.Plot() |
| |
| with gr.Row(): |
| gr.Markdown("## Model Evaluation") |
| |
| with gr.Row(): |
| with gr.Column(): |
| confusion_matrix_plot = gr.Plot() |
| |
| with gr.Column(): |
| precision_text = gr.Textbox(label="Precision") |
| recall_text = gr.Textbox(label="Recall") |
| |
| update_button.click( |
| update_interface, |
| inputs=[start_date, end_date, payer_id, payee_id, transaction_id, dimension, time_granularity], |
| outputs=[data_table, comparison_plot, time_series_plot, confusion_matrix_plot, precision_text, recall_text] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|