Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,7 @@ from sklearn.preprocessing import StandardScaler
|
|
| 11 |
import openai
|
| 12 |
from datetime import datetime, timedelta
|
| 13 |
import json
|
|
|
|
| 14 |
|
| 15 |
# Set OpenAI API key from Hugging Face Spaces secrets
|
| 16 |
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
|
@@ -179,7 +180,8 @@ def detect_fraud_and_anomalies(df):
|
|
| 179 |
if 'timestamp' in df.columns and ('user_id' in df.columns or 'account_id' in df.columns):
|
| 180 |
id_col = 'user_id' if 'user_id' in df.columns else 'account_id'
|
| 181 |
df = df.sort_values([id_col, 'timestamp'])
|
| 182 |
-
|
|
|
|
| 183 |
df['rapid_succession'] = df['time_diff'] < 300 # Less than 5 minutes
|
| 184 |
else:
|
| 185 |
df['rapid_succession'] = False
|
|
@@ -202,43 +204,54 @@ def create_visualizations(df):
|
|
| 202 |
"""Create visualizations for transaction data and anomalies"""
|
| 203 |
visualizations = {}
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
title='Distribution of Transaction Amounts',
|
| 210 |
-
labels={'amount': 'Transaction Amount', 'is_suspicious': 'Suspicious'}
|
| 211 |
-
)
|
| 212 |
-
visualizations['amount_distribution'] = fig1
|
| 213 |
-
|
| 214 |
-
# 2. Time series of transaction amounts
|
| 215 |
-
if 'timestamp' in df.columns:
|
| 216 |
-
fig2 = px.scatter(
|
| 217 |
-
df, x='timestamp', y='amount', color='is_suspicious',
|
| 218 |
color_discrete_map={True: 'red', False: 'blue'},
|
| 219 |
-
title='Transaction Amounts
|
| 220 |
-
labels={'amount': 'Transaction Amount', '
|
| 221 |
)
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
| 240 |
)
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
return visualizations
|
| 244 |
|
|
@@ -279,11 +292,16 @@ def process_transactions(file):
|
|
| 279 |
# Get AI analysis of suspicious transactions
|
| 280 |
ai_analysis = analyze_transaction_with_ai(df_with_anomalies, suspicious_transactions)
|
| 281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
# Return results and visualizations
|
| 283 |
return (
|
| 284 |
stats_summary,
|
| 285 |
ai_analysis,
|
| 286 |
-
|
| 287 |
visualizations.get('amount_distribution', None),
|
| 288 |
visualizations.get('time_series', None),
|
| 289 |
visualizations.get('fraud_score_dist', None)
|
|
@@ -329,6 +347,29 @@ def create_gradio_interface():
|
|
| 329 |
|
| 330 |
return app
|
| 331 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
| 333 |
app = create_gradio_interface()
|
| 334 |
app.launch(share=True)
|
|
|
|
| 11 |
import openai
|
| 12 |
from datetime import datetime, timedelta
|
| 13 |
import json
|
| 14 |
+
import tempfile
|
| 15 |
|
| 16 |
# Set OpenAI API key from Hugging Face Spaces secrets
|
| 17 |
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
|
|
|
| 180 |
if 'timestamp' in df.columns and ('user_id' in df.columns or 'account_id' in df.columns):
|
| 181 |
id_col = 'user_id' if 'user_id' in df.columns else 'account_id'
|
| 182 |
df = df.sort_values([id_col, 'timestamp'])
|
| 183 |
+
# Fix for deprecation warning - convert to float explicitly
|
| 184 |
+
df['time_diff'] = df.groupby(id_col)['timestamp'].diff().dt.total_seconds().fillna(0).astype(float)
|
| 185 |
df['rapid_succession'] = df['time_diff'] < 300 # Less than 5 minutes
|
| 186 |
else:
|
| 187 |
df['rapid_succession'] = False
|
|
|
|
| 204 |
"""Create visualizations for transaction data and anomalies"""
|
| 205 |
visualizations = {}
|
| 206 |
|
| 207 |
+
try:
|
| 208 |
+
# 1. Distribution of transaction amounts with anomalies highlighted
|
| 209 |
+
fig1 = px.histogram(
|
| 210 |
+
df, x='amount', color='is_suspicious',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
color_discrete_map={True: 'red', False: 'blue'},
|
| 212 |
+
title='Distribution of Transaction Amounts',
|
| 213 |
+
labels={'amount': 'Transaction Amount', 'is_suspicious': 'Suspicious'}
|
| 214 |
)
|
| 215 |
+
# Ensure the figure is fully rendered
|
| 216 |
+
fig1.update_layout(height=500, width=700)
|
| 217 |
+
visualizations['amount_distribution'] = fig1
|
| 218 |
+
|
| 219 |
+
# 2. Time series of transaction amounts
|
| 220 |
+
if 'timestamp' in df.columns:
|
| 221 |
+
fig2 = px.scatter(
|
| 222 |
+
df, x='timestamp', y='amount', color='is_suspicious',
|
| 223 |
+
color_discrete_map={True: 'red', False: 'blue'},
|
| 224 |
+
title='Transaction Amounts Over Time',
|
| 225 |
+
labels={'amount': 'Transaction Amount', 'timestamp': 'Time', 'is_suspicious': 'Suspicious'}
|
| 226 |
+
)
|
| 227 |
+
fig2.update_layout(height=500, width=700)
|
| 228 |
+
visualizations['time_series'] = fig2
|
| 229 |
+
|
| 230 |
+
# 3. Fraud score distribution
|
| 231 |
+
fig3 = px.histogram(
|
| 232 |
+
df, x='fraud_score',
|
| 233 |
+
title='Distribution of Fraud Scores',
|
| 234 |
+
labels={'fraud_score': 'Fraud Score'}
|
| 235 |
)
|
| 236 |
+
fig3.update_layout(height=500, width=700)
|
| 237 |
+
visualizations['fraud_score_dist'] = fig3
|
| 238 |
+
|
| 239 |
+
# 4. Hourly transaction pattern (if timestamp available)
|
| 240 |
+
if 'timestamp' in df.columns:
|
| 241 |
+
hourly_data = df.groupby([df['timestamp'].dt.hour, 'is_suspicious']).size().reset_index()
|
| 242 |
+
hourly_data.columns = ['hour', 'is_suspicious', 'count'] # Rename columns
|
| 243 |
+
|
| 244 |
+
fig4 = px.line(
|
| 245 |
+
hourly_data, x='hour', y='count', color='is_suspicious',
|
| 246 |
+
color_discrete_map={True: 'red', False: 'blue'},
|
| 247 |
+
title='Hourly Transaction Pattern',
|
| 248 |
+
labels={'hour': 'Hour of Day', 'count': 'Number of Transactions', 'is_suspicious': 'Suspicious'}
|
| 249 |
+
)
|
| 250 |
+
fig4.update_layout(height=500, width=700)
|
| 251 |
+
visualizations['hourly_pattern'] = fig4
|
| 252 |
+
|
| 253 |
+
except Exception as e:
|
| 254 |
+
print(f"Error in visualization creation: {str(e)}")
|
| 255 |
|
| 256 |
return visualizations
|
| 257 |
|
|
|
|
| 292 |
# Get AI analysis of suspicious transactions
|
| 293 |
ai_analysis = analyze_transaction_with_ai(df_with_anomalies, suspicious_transactions)
|
| 294 |
|
| 295 |
+
# Save suspicious transactions to a temporary file
|
| 296 |
+
temp_csv = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
|
| 297 |
+
suspicious_transactions.to_csv(temp_csv.name, index=False)
|
| 298 |
+
temp_csv.close()
|
| 299 |
+
|
| 300 |
# Return results and visualizations
|
| 301 |
return (
|
| 302 |
stats_summary,
|
| 303 |
ai_analysis,
|
| 304 |
+
temp_csv.name, # Return the path to the temporary file
|
| 305 |
visualizations.get('amount_distribution', None),
|
| 306 |
visualizations.get('time_series', None),
|
| 307 |
visualizations.get('fraud_score_dist', None)
|
|
|
|
| 347 |
|
| 348 |
return app
|
| 349 |
|
| 350 |
+
# For debugging purposes, set this to True to see more detailed error messages
|
| 351 |
+
def enable_debug_mode():
|
| 352 |
+
import logging
|
| 353 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 354 |
+
|
| 355 |
+
# Override process_transactions to catch and log all exceptions
|
| 356 |
+
global process_transactions
|
| 357 |
+
original_process_transactions = process_transactions
|
| 358 |
+
|
| 359 |
+
def debug_process_transactions(*args, **kwargs):
|
| 360 |
+
try:
|
| 361 |
+
return original_process_transactions(*args, **kwargs)
|
| 362 |
+
except Exception as e:
|
| 363 |
+
import traceback
|
| 364 |
+
error_trace = traceback.format_exc()
|
| 365 |
+
logging.error(f"Exception in process_transactions: {error_trace}")
|
| 366 |
+
return f"Error: {str(e)}\n\nFull traceback:\n{error_trace}", None, None, None, None, None
|
| 367 |
+
|
| 368 |
+
process_transactions = debug_process_transactions
|
| 369 |
+
|
| 370 |
if __name__ == "__main__":
|
| 371 |
+
# Uncomment to enable debug mode
|
| 372 |
+
# enable_debug_mode()
|
| 373 |
+
|
| 374 |
app = create_gradio_interface()
|
| 375 |
app.launch(share=True)
|