Abs6187 commited on
Commit
086b24e
·
verified ·
1 Parent(s): 5581f4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -23
app.py CHANGED
@@ -6,8 +6,7 @@ import seaborn as sns
6
  from datetime import datetime
7
  from sklearn.metrics import confusion_matrix, precision_score, recall_score
8
 
9
- # Sample data preparation (in a real scenario, you would load your data)
10
- # Converting your sample data to a DataFrame
11
  data = {
12
  'transaction_amount': [2500, 799, 9338, 11749, 8999, 1500, 3000, 4000, 300, 5000, 24990],
13
  'transaction_date': ['01-11-2024 16:08', '01-11-2024 16:15', '02-11-2024 14:43', '03-11-2024 11:14',
@@ -27,11 +26,8 @@ data = {
27
 
28
  df = pd.DataFrame(data)
29
 
30
- # Convert date strings to datetime objects
31
  df['transaction_date'] = pd.to_datetime(df['transaction_date'], format='%d-%m-%Y %H:%M')
32
 
33
- # Add simulated predicted fraud and reported fraud columns
34
- # In a real scenario, these would come from your model and reports
35
  np.random.seed(42)
36
  df['is_fraud_predicted'] = np.random.choice([0, 1], size=len(df), p=[0.3, 0.7])
37
  df['is_fraud_reported'] = np.random.choice([0, 1], size=len(df), p=[0.4, 0.6])
@@ -39,11 +35,9 @@ df['is_fraud_reported'] = np.random.choice([0, 1], size=len(df), p=[0.4, 0.6])
39
  def filter_data(start_date, end_date, payer_id, payee_id, transaction_id):
40
  filtered_df = df.copy()
41
 
42
- # Convert string dates to datetime for comparison
43
  start_date = pd.to_datetime(start_date)
44
  end_date = pd.to_datetime(end_date)
45
 
46
- # Apply filters
47
  filtered_df = filtered_df[(filtered_df['transaction_date'] >= start_date) &
48
  (filtered_df['transaction_date'] <= end_date)]
49
 
@@ -77,17 +71,14 @@ def create_comparison_chart(dimension, filtered_df):
77
  else:
78
  return plt.figure()
79
 
80
- # Group by the selected dimension and count predicted and reported frauds
81
  predicted = filtered_df.groupby(group_col)['is_fraud_predicted'].sum()
82
  reported = filtered_df.groupby(group_col)['is_fraud_reported'].sum()
83
 
84
- # Create a DataFrame for plotting
85
  plot_df = pd.DataFrame({
86
  'Predicted Fraud': predicted,
87
  'Reported Fraud': reported
88
  })
89
 
90
- # Plot
91
  plot_df.plot(kind='bar', figsize=(10, 6))
92
  plt.title(f'Fraud Comparison by {dimension}')
93
  plt.ylabel('Count')
@@ -102,7 +93,6 @@ def create_time_series(filtered_df, granularity):
102
 
103
  plt.figure(figsize=(12, 6))
104
 
105
- # Set the time grouping based on granularity
106
  if granularity == 'Day':
107
  time_group = filtered_df['transaction_date'].dt.date
108
  elif granularity == 'Hour':
@@ -112,11 +102,9 @@ def create_time_series(filtered_df, granularity):
112
  else:
113
  return plt.figure()
114
 
115
- # Group by time and count predicted and reported frauds
116
  predicted = filtered_df.groupby(time_group)['is_fraud_predicted'].sum()
117
  reported = filtered_df.groupby(time_group)['is_fraud_reported'].sum()
118
 
119
- # Plot
120
  plt.plot(predicted.index, predicted.values, 'b-', label='Predicted Fraud')
121
  plt.plot(reported.index, reported.values, 'r-', label='Reported Fraud')
122
  plt.title('Fraud Trend Over Time')
@@ -132,14 +120,11 @@ def calculate_metrics(filtered_df):
132
  if filtered_df.empty:
133
  return None, 0, 0
134
 
135
- # Calculate confusion matrix
136
  cm = confusion_matrix(filtered_df['is_fraud'], filtered_df['is_fraud_predicted'])
137
 
138
- # Calculate precision and recall
139
  precision = precision_score(filtered_df['is_fraud'], filtered_df['is_fraud_predicted'], zero_division=0)
140
  recall = recall_score(filtered_df['is_fraud'], filtered_df['is_fraud_predicted'], zero_division=0)
141
 
142
- # Create confusion matrix plot
143
  plt.figure(figsize=(6, 5))
144
  sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
145
  xticklabels=['Not Fraud', 'Fraud'],
@@ -151,19 +136,14 @@ def calculate_metrics(filtered_df):
151
  return plt, precision, recall
152
 
153
  def update_interface(start_date, end_date, payer_id, payee_id, transaction_id, dimension, time_granularity):
154
- # Filter data based on inputs
155
  filtered_df = filter_data(start_date, end_date, payer_id, payee_id, transaction_id)
156
 
157
- # Create comparison chart
158
  comparison_chart = create_comparison_chart(dimension, filtered_df)
159
 
160
- # Create time series chart
161
  time_series = create_time_series(filtered_df, time_granularity)
162
 
163
- # Calculate evaluation metrics
164
  confusion_matrix_plot, precision, recall = calculate_metrics(filtered_df)
165
 
166
- # Format the filtered dataframe for display
167
  display_df = filtered_df.copy()
168
  display_df['transaction_date'] = display_df['transaction_date'].dt.strftime('%Y-%m-%d %H:%M')
169
 
@@ -174,7 +154,6 @@ def update_interface(start_date, end_date, payer_id, payee_id, transaction_id, d
174
  f"Precision: {precision:.4f}",
175
  f"Recall: {recall:.4f}")
176
 
177
- # Define the Gradio interface
178
  with gr.Blocks() as demo:
179
  gr.Markdown("# Fraud Transaction Analysis Dashboard")
180
 
@@ -233,6 +212,5 @@ with gr.Blocks() as demo:
233
  outputs=[data_table, comparison_plot, time_series_plot, confusion_matrix_plot, precision_text, recall_text]
234
  )
235
 
236
- # Launch the app
237
  if __name__ == "__main__":
238
  demo.launch()
 
6
  from datetime import datetime
7
  from sklearn.metrics import confusion_matrix, precision_score, recall_score
8
 
9
+ # Sample data preparation
 
10
  data = {
11
  'transaction_amount': [2500, 799, 9338, 11749, 8999, 1500, 3000, 4000, 300, 5000, 24990],
12
  'transaction_date': ['01-11-2024 16:08', '01-11-2024 16:15', '02-11-2024 14:43', '03-11-2024 11:14',
 
26
 
27
  df = pd.DataFrame(data)
28
 
 
29
  df['transaction_date'] = pd.to_datetime(df['transaction_date'], format='%d-%m-%Y %H:%M')
30
 
 
 
31
  np.random.seed(42)
32
  df['is_fraud_predicted'] = np.random.choice([0, 1], size=len(df), p=[0.3, 0.7])
33
  df['is_fraud_reported'] = np.random.choice([0, 1], size=len(df), p=[0.4, 0.6])
 
35
  def filter_data(start_date, end_date, payer_id, payee_id, transaction_id):
36
  filtered_df = df.copy()
37
 
 
38
  start_date = pd.to_datetime(start_date)
39
  end_date = pd.to_datetime(end_date)
40
 
 
41
  filtered_df = filtered_df[(filtered_df['transaction_date'] >= start_date) &
42
  (filtered_df['transaction_date'] <= end_date)]
43
 
 
71
  else:
72
  return plt.figure()
73
 
 
74
  predicted = filtered_df.groupby(group_col)['is_fraud_predicted'].sum()
75
  reported = filtered_df.groupby(group_col)['is_fraud_reported'].sum()
76
 
 
77
  plot_df = pd.DataFrame({
78
  'Predicted Fraud': predicted,
79
  'Reported Fraud': reported
80
  })
81
 
 
82
  plot_df.plot(kind='bar', figsize=(10, 6))
83
  plt.title(f'Fraud Comparison by {dimension}')
84
  plt.ylabel('Count')
 
93
 
94
  plt.figure(figsize=(12, 6))
95
 
 
96
  if granularity == 'Day':
97
  time_group = filtered_df['transaction_date'].dt.date
98
  elif granularity == 'Hour':
 
102
  else:
103
  return plt.figure()
104
 
 
105
  predicted = filtered_df.groupby(time_group)['is_fraud_predicted'].sum()
106
  reported = filtered_df.groupby(time_group)['is_fraud_reported'].sum()
107
 
 
108
  plt.plot(predicted.index, predicted.values, 'b-', label='Predicted Fraud')
109
  plt.plot(reported.index, reported.values, 'r-', label='Reported Fraud')
110
  plt.title('Fraud Trend Over Time')
 
120
  if filtered_df.empty:
121
  return None, 0, 0
122
 
 
123
  cm = confusion_matrix(filtered_df['is_fraud'], filtered_df['is_fraud_predicted'])
124
 
 
125
  precision = precision_score(filtered_df['is_fraud'], filtered_df['is_fraud_predicted'], zero_division=0)
126
  recall = recall_score(filtered_df['is_fraud'], filtered_df['is_fraud_predicted'], zero_division=0)
127
 
 
128
  plt.figure(figsize=(6, 5))
129
  sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
130
  xticklabels=['Not Fraud', 'Fraud'],
 
136
  return plt, precision, recall
137
 
138
  def update_interface(start_date, end_date, payer_id, payee_id, transaction_id, dimension, time_granularity):
 
139
  filtered_df = filter_data(start_date, end_date, payer_id, payee_id, transaction_id)
140
 
 
141
  comparison_chart = create_comparison_chart(dimension, filtered_df)
142
 
 
143
  time_series = create_time_series(filtered_df, time_granularity)
144
 
 
145
  confusion_matrix_plot, precision, recall = calculate_metrics(filtered_df)
146
 
 
147
  display_df = filtered_df.copy()
148
  display_df['transaction_date'] = display_df['transaction_date'].dt.strftime('%Y-%m-%d %H:%M')
149
 
 
154
  f"Precision: {precision:.4f}",
155
  f"Recall: {recall:.4f}")
156
 
 
157
  with gr.Blocks() as demo:
158
  gr.Markdown("# Fraud Transaction Analysis Dashboard")
159
 
 
212
  outputs=[data_table, comparison_plot, time_series_plot, confusion_matrix_plot, precision_text, recall_text]
213
  )
214
 
 
215
  if __name__ == "__main__":
216
  demo.launch()