prernajeet01 commited on
Commit
a05090e
·
verified ·
1 Parent(s): 93ea1d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -44
app.py CHANGED
@@ -22,16 +22,23 @@ def analyze_transaction_with_ai(transaction_data, suspicious_transactions):
22
  return "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
23
 
24
  try:
25
- # Prepare information for OpenAI
26
- suspicious_sample = suspicious_transactions.head(5).to_dict(orient='records')
 
 
 
 
 
 
 
27
 
28
  # Get summary statistics
29
  summary_stats = {
30
- "total_transactions": len(transaction_data),
31
- "flagged_transactions": len(suspicious_transactions),
32
- "flagged_percentage": round(len(suspicious_transactions) / len(transaction_data) * 100, 2),
33
- "avg_transaction_amount": round(transaction_data['amount'].mean(), 2),
34
- "suspicious_avg_amount": round(suspicious_transactions['amount'].mean(), 2)
35
  }
36
 
37
  # Create prompt for OpenAI
@@ -42,7 +49,7 @@ def analyze_transaction_with_ai(transaction_data, suspicious_transactions):
42
  {json.dumps(summary_stats)}
43
 
44
  Sample of Suspicious Transactions:
45
- {json.dumps(suspicious_sample)}
46
 
47
  Provide a concise fraud analysis report with:
48
  1. Key patterns and red flags in these transactions
@@ -136,8 +143,9 @@ def detect_fraud_and_anomalies(df):
136
 
137
  # Add time-based features if available
138
  if 'timestamp' in df.columns:
139
- features['hour_of_day'] = df['timestamp'].dt.hour
140
- features['day_of_week'] = df['timestamp'].dt.dayofweek
 
141
 
142
  # Add other relevant features if available
143
  if 'location' in df.columns:
@@ -163,7 +171,9 @@ def detect_fraud_and_anomalies(df):
163
 
164
  # 2. Transactions occurring at unusual hours (if timestamp available)
165
  if 'timestamp' in df.columns:
166
- df['unusual_hour'] = df['timestamp'].dt.hour.isin([0, 1, 2, 3, 4])
 
 
167
  else:
168
  df['unusual_hour'] = False
169
 
@@ -180,8 +190,12 @@ def detect_fraud_and_anomalies(df):
180
  if 'timestamp' in df.columns and ('user_id' in df.columns or 'account_id' in df.columns):
181
  id_col = 'user_id' if 'user_id' in df.columns else 'account_id'
182
  df = df.sort_values([id_col, 'timestamp'])
183
- # Fix for deprecation warning - convert to float explicitly
184
- df['time_diff'] = df.groupby(id_col)['timestamp'].diff().dt.total_seconds().fillna(0).astype(float)
 
 
 
 
185
  df['rapid_succession'] = df['time_diff'] < 300 # Less than 5 minutes
186
  else:
187
  df['rapid_succession'] = False
@@ -205,9 +219,14 @@ def create_visualizations(df):
205
  visualizations = {}
206
 
207
  try:
 
 
 
 
 
208
  # 1. Distribution of transaction amounts with anomalies highlighted
209
  fig1 = px.histogram(
210
- df, x='amount', color='is_suspicious',
211
  color_discrete_map={True: 'red', False: 'blue'},
212
  title='Distribution of Transaction Amounts',
213
  labels={'amount': 'Transaction Amount', 'is_suspicious': 'Suspicious'}
@@ -217,19 +236,19 @@ def create_visualizations(df):
217
  visualizations['amount_distribution'] = fig1
218
 
219
  # 2. Time series of transaction amounts
220
- if 'timestamp' in df.columns:
221
  fig2 = px.scatter(
222
- df, x='timestamp', y='amount', color='is_suspicious',
223
  color_discrete_map={True: 'red', False: 'blue'},
224
  title='Transaction Amounts Over Time',
225
- labels={'amount': 'Transaction Amount', 'timestamp': 'Time', 'is_suspicious': 'Suspicious'}
226
  )
227
  fig2.update_layout(height=500, width=700)
228
  visualizations['time_series'] = fig2
229
 
230
  # 3. Fraud score distribution
231
  fig3 = px.histogram(
232
- df, x='fraud_score',
233
  title='Distribution of Fraud Scores',
234
  labels={'fraud_score': 'Fraud Score'}
235
  )
@@ -237,12 +256,14 @@ def create_visualizations(df):
237
  visualizations['fraud_score_dist'] = fig3
238
 
239
  # 4. Hourly transaction pattern (if timestamp available)
240
- if 'timestamp' in df.columns:
241
- hourly_data = df.groupby([df['timestamp'].dt.hour, 'is_suspicious']).size().reset_index()
242
- hourly_data.columns = ['hour', 'is_suspicious', 'count'] # Rename columns
 
 
243
 
244
  fig4 = px.line(
245
- hourly_data, x='hour', y='count', color='is_suspicious',
246
  color_discrete_map={True: 'red', False: 'blue'},
247
  title='Hourly Transaction Pattern',
248
  labels={'hour': 'Hour of Day', 'count': 'Number of Transactions', 'is_suspicious': 'Suspicious'}
@@ -308,7 +329,9 @@ def process_transactions(file):
308
  )
309
 
310
  except Exception as e:
311
- return f"Error: {str(e)}", None, None, None, None, None
 
 
312
 
313
  def create_gradio_interface():
314
  """Create Gradio interface for the application"""
@@ -347,29 +370,10 @@ def create_gradio_interface():
347
 
348
  return app
349
 
350
- # For debugging purposes, set this to True to see more detailed error messages
351
- def enable_debug_mode():
352
  import logging
353
  logging.basicConfig(level=logging.DEBUG)
354
 
355
- # Override process_transactions to catch and log all exceptions
356
- global process_transactions
357
- original_process_transactions = process_transactions
358
-
359
- def debug_process_transactions(*args, **kwargs):
360
- try:
361
- return original_process_transactions(*args, **kwargs)
362
- except Exception as e:
363
- import traceback
364
- error_trace = traceback.format_exc()
365
- logging.error(f"Exception in process_transactions: {error_trace}")
366
- return f"Error: {str(e)}\n\nFull traceback:\n{error_trace}", None, None, None, None, None
367
-
368
- process_transactions = debug_process_transactions
369
-
370
- if __name__ == "__main__":
371
- # Uncomment to enable debug mode
372
- # enable_debug_mode()
373
-
374
  app = create_gradio_interface()
375
  app.launch(share=True)
 
22
  return "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
23
 
24
  try:
25
+ # Prepare information for OpenAI, converting to a JSON-serializable format
26
+ suspicious_sample = suspicious_transactions.head(5).copy()
27
+
28
+ # Convert timestamp to string format to make it JSON serializable
29
+ if 'timestamp' in suspicious_sample.columns:
30
+ suspicious_sample['timestamp'] = suspicious_sample['timestamp'].astype(str)
31
+
32
+ # Convert to dictionary
33
+ suspicious_dict = suspicious_sample.to_dict(orient='records')
34
 
35
  # Get summary statistics
36
  summary_stats = {
37
+ "total_transactions": int(len(transaction_data)),
38
+ "flagged_transactions": int(len(suspicious_transactions)),
39
+ "flagged_percentage": float(round(len(suspicious_transactions) / len(transaction_data) * 100, 2)),
40
+ "avg_transaction_amount": float(round(transaction_data['amount'].mean(), 2)),
41
+ "suspicious_avg_amount": float(round(suspicious_transactions['amount'].mean(), 2))
42
  }
43
 
44
  # Create prompt for OpenAI
 
49
  {json.dumps(summary_stats)}
50
 
51
  Sample of Suspicious Transactions:
52
+ {json.dumps(suspicious_dict)}
53
 
54
  Provide a concise fraud analysis report with:
55
  1. Key patterns and red flags in these transactions
 
143
 
144
  # Add time-based features if available
145
  if 'timestamp' in df.columns:
146
+ # Extract hour and day of week without using .dt.to_pydatetime()
147
+ features['hour_of_day'] = pd.to_numeric(df['timestamp'].dt.hour)
148
+ features['day_of_week'] = pd.to_numeric(df['timestamp'].dt.dayofweek)
149
 
150
  # Add other relevant features if available
151
  if 'location' in df.columns:
 
171
 
172
  # 2. Transactions occurring at unusual hours (if timestamp available)
173
  if 'timestamp' in df.columns:
174
+ # Fix for datetime warning
175
+ hours = np.array(df['timestamp'].dt.hour)
176
+ df['unusual_hour'] = np.isin(hours, [0, 1, 2, 3, 4])
177
  else:
178
  df['unusual_hour'] = False
179
 
 
190
  if 'timestamp' in df.columns and ('user_id' in df.columns or 'account_id' in df.columns):
191
  id_col = 'user_id' if 'user_id' in df.columns else 'account_id'
192
  df = df.sort_values([id_col, 'timestamp'])
193
+
194
+ # Fix for datetime warning by using numpy arrays
195
+ time_diffs = df.groupby(id_col)['timestamp'].diff()
196
+ # Convert to seconds and handle NaN values
197
+ seconds = np.array([td.total_seconds() if pd.notnull(td) else 0 for td in time_diffs])
198
+ df['time_diff'] = seconds
199
  df['rapid_succession'] = df['time_diff'] < 300 # Less than 5 minutes
200
  else:
201
  df['rapid_succession'] = False
 
219
  visualizations = {}
220
 
221
  try:
222
+ # Convert timestamp to string for plotly to avoid datetime warning
223
+ plot_df = df.copy()
224
+ if 'timestamp' in plot_df.columns:
225
+ plot_df['timestamp_str'] = plot_df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
226
+
227
  # 1. Distribution of transaction amounts with anomalies highlighted
228
  fig1 = px.histogram(
229
+ plot_df, x='amount', color='is_suspicious',
230
  color_discrete_map={True: 'red', False: 'blue'},
231
  title='Distribution of Transaction Amounts',
232
  labels={'amount': 'Transaction Amount', 'is_suspicious': 'Suspicious'}
 
236
  visualizations['amount_distribution'] = fig1
237
 
238
  # 2. Time series of transaction amounts
239
+ if 'timestamp' in plot_df.columns:
240
  fig2 = px.scatter(
241
+ plot_df, x='timestamp_str', y='amount', color='is_suspicious',
242
  color_discrete_map={True: 'red', False: 'blue'},
243
  title='Transaction Amounts Over Time',
244
+ labels={'amount': 'Transaction Amount', 'timestamp_str': 'Time', 'is_suspicious': 'Suspicious'}
245
  )
246
  fig2.update_layout(height=500, width=700)
247
  visualizations['time_series'] = fig2
248
 
249
  # 3. Fraud score distribution
250
  fig3 = px.histogram(
251
+ plot_df, x='fraud_score',
252
  title='Distribution of Fraud Scores',
253
  labels={'fraud_score': 'Fraud Score'}
254
  )
 
256
  visualizations['fraud_score_dist'] = fig3
257
 
258
  # 4. Hourly transaction pattern (if timestamp available)
259
+ if 'timestamp' in plot_df.columns:
260
+ # Fixed approach to get hourly data
261
+ hourly_counts = plot_df.groupby([plot_df['timestamp'].dt.hour, 'is_suspicious']).size()
262
+ hourly_df = hourly_counts.reset_index()
263
+ hourly_df.columns = ['hour', 'is_suspicious', 'count']
264
 
265
  fig4 = px.line(
266
+ hourly_df, x='hour', y='count', color='is_suspicious',
267
  color_discrete_map={True: 'red', False: 'blue'},
268
  title='Hourly Transaction Pattern',
269
  labels={'hour': 'Hour of Day', 'count': 'Number of Transactions', 'is_suspicious': 'Suspicious'}
 
329
  )
330
 
331
  except Exception as e:
332
+ import traceback
333
+ error_trace = traceback.format_exc()
334
+ return f"Error: {str(e)}\n\nTrace: {error_trace}", None, None, None, None, None
335
 
336
  def create_gradio_interface():
337
  """Create Gradio interface for the application"""
 
370
 
371
  return app
372
 
373
+ if __name__ == "__main__":
374
+ # Enable debug mode to get detailed error messages
375
  import logging
376
  logging.basicConfig(level=logging.DEBUG)
377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  app = create_gradio_interface()
379
  app.launch(share=True)