Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,16 +22,23 @@ def analyze_transaction_with_ai(transaction_data, suspicious_transactions):
|
|
| 22 |
return "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
|
| 23 |
|
| 24 |
try:
|
| 25 |
-
# Prepare information for OpenAI
|
| 26 |
-
suspicious_sample = suspicious_transactions.head(5).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Get summary statistics
|
| 29 |
summary_stats = {
|
| 30 |
-
"total_transactions": len(transaction_data),
|
| 31 |
-
"flagged_transactions": len(suspicious_transactions),
|
| 32 |
-
"flagged_percentage": round(len(suspicious_transactions) / len(transaction_data) * 100, 2),
|
| 33 |
-
"avg_transaction_amount": round(transaction_data['amount'].mean(), 2),
|
| 34 |
-
"suspicious_avg_amount": round(suspicious_transactions['amount'].mean(), 2)
|
| 35 |
}
|
| 36 |
|
| 37 |
# Create prompt for OpenAI
|
|
@@ -42,7 +49,7 @@ def analyze_transaction_with_ai(transaction_data, suspicious_transactions):
|
|
| 42 |
{json.dumps(summary_stats)}
|
| 43 |
|
| 44 |
Sample of Suspicious Transactions:
|
| 45 |
-
{json.dumps(
|
| 46 |
|
| 47 |
Provide a concise fraud analysis report with:
|
| 48 |
1. Key patterns and red flags in these transactions
|
|
@@ -136,8 +143,9 @@ def detect_fraud_and_anomalies(df):
|
|
| 136 |
|
| 137 |
# Add time-based features if available
|
| 138 |
if 'timestamp' in df.columns:
|
| 139 |
-
|
| 140 |
-
features['
|
|
|
|
| 141 |
|
| 142 |
# Add other relevant features if available
|
| 143 |
if 'location' in df.columns:
|
|
@@ -163,7 +171,9 @@ def detect_fraud_and_anomalies(df):
|
|
| 163 |
|
| 164 |
# 2. Transactions occurring at unusual hours (if timestamp available)
|
| 165 |
if 'timestamp' in df.columns:
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
else:
|
| 168 |
df['unusual_hour'] = False
|
| 169 |
|
|
@@ -180,8 +190,12 @@ def detect_fraud_and_anomalies(df):
|
|
| 180 |
if 'timestamp' in df.columns and ('user_id' in df.columns or 'account_id' in df.columns):
|
| 181 |
id_col = 'user_id' if 'user_id' in df.columns else 'account_id'
|
| 182 |
df = df.sort_values([id_col, 'timestamp'])
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
df['rapid_succession'] = df['time_diff'] < 300 # Less than 5 minutes
|
| 186 |
else:
|
| 187 |
df['rapid_succession'] = False
|
|
@@ -205,9 +219,14 @@ def create_visualizations(df):
|
|
| 205 |
visualizations = {}
|
| 206 |
|
| 207 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
# 1. Distribution of transaction amounts with anomalies highlighted
|
| 209 |
fig1 = px.histogram(
|
| 210 |
-
|
| 211 |
color_discrete_map={True: 'red', False: 'blue'},
|
| 212 |
title='Distribution of Transaction Amounts',
|
| 213 |
labels={'amount': 'Transaction Amount', 'is_suspicious': 'Suspicious'}
|
|
@@ -217,19 +236,19 @@ def create_visualizations(df):
|
|
| 217 |
visualizations['amount_distribution'] = fig1
|
| 218 |
|
| 219 |
# 2. Time series of transaction amounts
|
| 220 |
-
if 'timestamp' in
|
| 221 |
fig2 = px.scatter(
|
| 222 |
-
|
| 223 |
color_discrete_map={True: 'red', False: 'blue'},
|
| 224 |
title='Transaction Amounts Over Time',
|
| 225 |
-
labels={'amount': 'Transaction Amount', '
|
| 226 |
)
|
| 227 |
fig2.update_layout(height=500, width=700)
|
| 228 |
visualizations['time_series'] = fig2
|
| 229 |
|
| 230 |
# 3. Fraud score distribution
|
| 231 |
fig3 = px.histogram(
|
| 232 |
-
|
| 233 |
title='Distribution of Fraud Scores',
|
| 234 |
labels={'fraud_score': 'Fraud Score'}
|
| 235 |
)
|
|
@@ -237,12 +256,14 @@ def create_visualizations(df):
|
|
| 237 |
visualizations['fraud_score_dist'] = fig3
|
| 238 |
|
| 239 |
# 4. Hourly transaction pattern (if timestamp available)
|
| 240 |
-
if 'timestamp' in
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
| 243 |
|
| 244 |
fig4 = px.line(
|
| 245 |
-
|
| 246 |
color_discrete_map={True: 'red', False: 'blue'},
|
| 247 |
title='Hourly Transaction Pattern',
|
| 248 |
labels={'hour': 'Hour of Day', 'count': 'Number of Transactions', 'is_suspicious': 'Suspicious'}
|
|
@@ -308,7 +329,9 @@ def process_transactions(file):
|
|
| 308 |
)
|
| 309 |
|
| 310 |
except Exception as e:
|
| 311 |
-
|
|
|
|
|
|
|
| 312 |
|
| 313 |
def create_gradio_interface():
|
| 314 |
"""Create Gradio interface for the application"""
|
|
@@ -347,29 +370,10 @@ def create_gradio_interface():
|
|
| 347 |
|
| 348 |
return app
|
| 349 |
|
| 350 |
-
|
| 351 |
-
|
| 352 |
import logging
|
| 353 |
logging.basicConfig(level=logging.DEBUG)
|
| 354 |
|
| 355 |
-
# Override process_transactions to catch and log all exceptions
|
| 356 |
-
global process_transactions
|
| 357 |
-
original_process_transactions = process_transactions
|
| 358 |
-
|
| 359 |
-
def debug_process_transactions(*args, **kwargs):
|
| 360 |
-
try:
|
| 361 |
-
return original_process_transactions(*args, **kwargs)
|
| 362 |
-
except Exception as e:
|
| 363 |
-
import traceback
|
| 364 |
-
error_trace = traceback.format_exc()
|
| 365 |
-
logging.error(f"Exception in process_transactions: {error_trace}")
|
| 366 |
-
return f"Error: {str(e)}\n\nFull traceback:\n{error_trace}", None, None, None, None, None
|
| 367 |
-
|
| 368 |
-
process_transactions = debug_process_transactions
|
| 369 |
-
|
| 370 |
-
if __name__ == "__main__":
|
| 371 |
-
# Uncomment to enable debug mode
|
| 372 |
-
# enable_debug_mode()
|
| 373 |
-
|
| 374 |
app = create_gradio_interface()
|
| 375 |
app.launch(share=True)
|
|
|
|
| 22 |
return "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
|
| 23 |
|
| 24 |
try:
|
| 25 |
+
# Prepare information for OpenAI, converting to a JSON-serializable format
|
| 26 |
+
suspicious_sample = suspicious_transactions.head(5).copy()
|
| 27 |
+
|
| 28 |
+
# Convert timestamp to string format to make it JSON serializable
|
| 29 |
+
if 'timestamp' in suspicious_sample.columns:
|
| 30 |
+
suspicious_sample['timestamp'] = suspicious_sample['timestamp'].astype(str)
|
| 31 |
+
|
| 32 |
+
# Convert to dictionary
|
| 33 |
+
suspicious_dict = suspicious_sample.to_dict(orient='records')
|
| 34 |
|
| 35 |
# Get summary statistics
|
| 36 |
summary_stats = {
|
| 37 |
+
"total_transactions": int(len(transaction_data)),
|
| 38 |
+
"flagged_transactions": int(len(suspicious_transactions)),
|
| 39 |
+
"flagged_percentage": float(round(len(suspicious_transactions) / len(transaction_data) * 100, 2)),
|
| 40 |
+
"avg_transaction_amount": float(round(transaction_data['amount'].mean(), 2)),
|
| 41 |
+
"suspicious_avg_amount": float(round(suspicious_transactions['amount'].mean(), 2))
|
| 42 |
}
|
| 43 |
|
| 44 |
# Create prompt for OpenAI
|
|
|
|
| 49 |
{json.dumps(summary_stats)}
|
| 50 |
|
| 51 |
Sample of Suspicious Transactions:
|
| 52 |
+
{json.dumps(suspicious_dict)}
|
| 53 |
|
| 54 |
Provide a concise fraud analysis report with:
|
| 55 |
1. Key patterns and red flags in these transactions
|
|
|
|
| 143 |
|
| 144 |
# Add time-based features if available
|
| 145 |
if 'timestamp' in df.columns:
|
| 146 |
+
# Extract hour and day of week without using .dt.to_pydatetime()
|
| 147 |
+
features['hour_of_day'] = pd.to_numeric(df['timestamp'].dt.hour)
|
| 148 |
+
features['day_of_week'] = pd.to_numeric(df['timestamp'].dt.dayofweek)
|
| 149 |
|
| 150 |
# Add other relevant features if available
|
| 151 |
if 'location' in df.columns:
|
|
|
|
| 171 |
|
| 172 |
# 2. Transactions occurring at unusual hours (if timestamp available)
|
| 173 |
if 'timestamp' in df.columns:
|
| 174 |
+
# Fix for datetime warning
|
| 175 |
+
hours = np.array(df['timestamp'].dt.hour)
|
| 176 |
+
df['unusual_hour'] = np.isin(hours, [0, 1, 2, 3, 4])
|
| 177 |
else:
|
| 178 |
df['unusual_hour'] = False
|
| 179 |
|
|
|
|
| 190 |
if 'timestamp' in df.columns and ('user_id' in df.columns or 'account_id' in df.columns):
|
| 191 |
id_col = 'user_id' if 'user_id' in df.columns else 'account_id'
|
| 192 |
df = df.sort_values([id_col, 'timestamp'])
|
| 193 |
+
|
| 194 |
+
# Fix for datetime warning by using numpy arrays
|
| 195 |
+
time_diffs = df.groupby(id_col)['timestamp'].diff()
|
| 196 |
+
# Convert to seconds and handle NaN values
|
| 197 |
+
seconds = np.array([td.total_seconds() if pd.notnull(td) else 0 for td in time_diffs])
|
| 198 |
+
df['time_diff'] = seconds
|
| 199 |
df['rapid_succession'] = df['time_diff'] < 300 # Less than 5 minutes
|
| 200 |
else:
|
| 201 |
df['rapid_succession'] = False
|
|
|
|
| 219 |
visualizations = {}
|
| 220 |
|
| 221 |
try:
|
| 222 |
+
# Convert timestamp to string for plotly to avoid datetime warning
|
| 223 |
+
plot_df = df.copy()
|
| 224 |
+
if 'timestamp' in plot_df.columns:
|
| 225 |
+
plot_df['timestamp_str'] = plot_df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
| 226 |
+
|
| 227 |
# 1. Distribution of transaction amounts with anomalies highlighted
|
| 228 |
fig1 = px.histogram(
|
| 229 |
+
plot_df, x='amount', color='is_suspicious',
|
| 230 |
color_discrete_map={True: 'red', False: 'blue'},
|
| 231 |
title='Distribution of Transaction Amounts',
|
| 232 |
labels={'amount': 'Transaction Amount', 'is_suspicious': 'Suspicious'}
|
|
|
|
| 236 |
visualizations['amount_distribution'] = fig1
|
| 237 |
|
| 238 |
# 2. Time series of transaction amounts
|
| 239 |
+
if 'timestamp' in plot_df.columns:
|
| 240 |
fig2 = px.scatter(
|
| 241 |
+
plot_df, x='timestamp_str', y='amount', color='is_suspicious',
|
| 242 |
color_discrete_map={True: 'red', False: 'blue'},
|
| 243 |
title='Transaction Amounts Over Time',
|
| 244 |
+
labels={'amount': 'Transaction Amount', 'timestamp_str': 'Time', 'is_suspicious': 'Suspicious'}
|
| 245 |
)
|
| 246 |
fig2.update_layout(height=500, width=700)
|
| 247 |
visualizations['time_series'] = fig2
|
| 248 |
|
| 249 |
# 3. Fraud score distribution
|
| 250 |
fig3 = px.histogram(
|
| 251 |
+
plot_df, x='fraud_score',
|
| 252 |
title='Distribution of Fraud Scores',
|
| 253 |
labels={'fraud_score': 'Fraud Score'}
|
| 254 |
)
|
|
|
|
| 256 |
visualizations['fraud_score_dist'] = fig3
|
| 257 |
|
| 258 |
# 4. Hourly transaction pattern (if timestamp available)
|
| 259 |
+
if 'timestamp' in plot_df.columns:
|
| 260 |
+
# Fixed approach to get hourly data
|
| 261 |
+
hourly_counts = plot_df.groupby([plot_df['timestamp'].dt.hour, 'is_suspicious']).size()
|
| 262 |
+
hourly_df = hourly_counts.reset_index()
|
| 263 |
+
hourly_df.columns = ['hour', 'is_suspicious', 'count']
|
| 264 |
|
| 265 |
fig4 = px.line(
|
| 266 |
+
hourly_df, x='hour', y='count', color='is_suspicious',
|
| 267 |
color_discrete_map={True: 'red', False: 'blue'},
|
| 268 |
title='Hourly Transaction Pattern',
|
| 269 |
labels={'hour': 'Hour of Day', 'count': 'Number of Transactions', 'is_suspicious': 'Suspicious'}
|
|
|
|
| 329 |
)
|
| 330 |
|
| 331 |
except Exception as e:
|
| 332 |
+
import traceback
|
| 333 |
+
error_trace = traceback.format_exc()
|
| 334 |
+
return f"Error: {str(e)}\n\nTrace: {error_trace}", None, None, None, None, None
|
| 335 |
|
| 336 |
def create_gradio_interface():
|
| 337 |
"""Create Gradio interface for the application"""
|
|
|
|
| 370 |
|
| 371 |
return app
|
| 372 |
|
| 373 |
+
if __name__ == "__main__":
|
| 374 |
+
# Enable debug mode to get detailed error messages
|
| 375 |
import logging
|
| 376 |
logging.basicConfig(level=logging.DEBUG)
|
| 377 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
app = create_gradio_interface()
|
| 379 |
app.launch(share=True)
|