Spaces:
Sleeping
Sleeping
File size: 7,413 Bytes
69cbf5f 405dcde 69cbf5f 7ca1795 405dcde 7ca1795 69cbf5f 7ca1795 69cbf5f 405dcde 69cbf5f 7ca1795 69cbf5f 7ca1795 69cbf5f 7ca1795 69cbf5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
import gradio as gr
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.cluster import KMeans
from sklearn.preprocessing import scale
import matplotlib
matplotlib.use('Agg')
import io
import base64
from PIL import Image
import os
from openai import OpenAI
# Path to the CSV file in the environment
CSV_PATH = 'FI_Transactions.csv'
# Get OpenAI API key from environment variables
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
client = OpenAI(api_key=OPENAI_API_KEY)
def detect_anomalies(nu_value, n_clusters):
# Read the CSV file from the environment
df = pd.read_csv(CSV_PATH)
# Data preprocessing and scaling
features = df.select_dtypes(include=[np.number])
feature_names = features.columns.tolist()
scaled_features = scale(features)
# Train One-Class SVM for anomaly detection
svm_model = OneClassSVM(kernel='rbf', nu=nu_value, gamma='scale')
svm_model.fit(scaled_features)
# Predict anomalies
svm_preds = svm_model.predict(scaled_features)
df['SVM_Anomaly'] = ['Anomaly' if x == -1 else 'Normal' for x in svm_preds]
# Count anomalies
anomaly_count = (df['SVM_Anomaly'] == 'Anomaly').sum()
normal_count = (df['SVM_Anomaly'] == 'Normal').sum()
# Train KMeans for clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(scaled_features)
df['KMeans_Cluster'] = kmeans.labels_
# Create visualizations
# 1. Anomalies count pie chart
plt.figure(figsize=(8, 6))
plt.pie([anomaly_count, normal_count], labels=['Anomalies', 'Normal'], autopct='%1.1f%%', colors=['#FF9999', '#66B2FF'])
plt.title('SVM Anomaly Detection Results')
pie_chart_img = plt_to_img()
# 2. KMeans clustering scatter plot (using first two features)
plt.figure(figsize=(10, 6))
x_feature = 0 if len(feature_names) > 0 else 0
y_feature = 1 if len(feature_names) > 1 else 0
scatter = plt.scatter(scaled_features[:, x_feature],
scaled_features[:, y_feature],
c=kmeans.labels_,
cmap='viridis',
alpha=0.7)
plt.colorbar(scatter, label='Cluster')
plt.title('KMeans Clustering Results')
plt.xlabel(feature_names[x_feature] if len(feature_names) > x_feature else "Feature 1")
plt.ylabel(feature_names[y_feature] if len(feature_names) > y_feature else "Feature 2")
kmeans_img = plt_to_img()
# 3. SVM anomalies scatter plot
plt.figure(figsize=(10, 6))
colors = ['red' if x == 'Anomaly' else 'blue' for x in df['SVM_Anomaly']]
plt.scatter(scaled_features[:, x_feature],
scaled_features[:, y_feature],
c=colors,
alpha=0.7)
plt.title('SVM Anomaly Detection')
plt.xlabel(feature_names[x_feature] if len(feature_names) > x_feature else "Feature 1")
plt.ylabel(feature_names[y_feature] if len(feature_names) > y_feature else "Feature 2")
plt.legend(['Anomaly', 'Normal'])
svm_img = plt_to_img()
# Create summary dataframe of anomalies
anomalies_df = df[df['SVM_Anomaly'] == 'Anomaly'].reset_index()
# Get AI insights about anomalies using OpenAI API
ai_insights = get_ai_insights(df, anomalies_df)
# Convert the dataframe to HTML for display
anomalies_html = anomalies_df.to_html(classes='table table-striped')
# Create HTML summary
summary_html = f"""
<h3>Analysis Summary</h3>
<p>Total transactions: {len(df)}</p>
<p>Anomalies detected: {anomaly_count} ({anomaly_count/len(df)*100:.2f}%)</p>
<p>Normal transactions: {normal_count} ({normal_count/len(df)*100:.2f}%)</p>
<h3>AI Insights</h3>
<p>{ai_insights}</p>
"""
return pie_chart_img, kmeans_img, svm_img, summary_html, anomalies_html
def get_ai_insights(df, anomalies_df):
"""Get insights about the anomalies using OpenAI API"""
try:
if not OPENAI_API_KEY:
return "OpenAI API key not found in environment variables. AI insights are unavailable."
# Prepare information about the dataset and anomalies
df_info = df.describe().to_string()
anomaly_info = anomalies_df.head(5).to_string() if not anomalies_df.empty else "No anomalies detected"
# Create a prompt for the OpenAI API
prompt = f"""
Analyze the following financial transaction data and detected anomalies:
Dataset Statistics:
{df_info}
Sample Anomalies (top 5):
{anomaly_info}
Please provide:
1. Possible patterns or reasons for these anomalies
2. Recommendations for further investigation
3. Potential risk factors these anomalies might indicate
Keep your analysis concise and focused on financial fraud detection.
"""
# Call the OpenAI API using the new client format
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a financial fraud detection expert."},
{"role": "user", "content": prompt}
],
max_tokens=500
)
# Extract and return the insights
return response.choices[0].message.content
except Exception as e:
return f"Could not generate AI insights. Error: {str(e)}"
def plt_to_img():
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
img = Image.open(buf)
return img
# Create the Gradio interface
with gr.Blocks(title="Financial Transaction Anomaly Detection") as demo:
gr.Markdown("# Financial Transaction Anomaly Detection")
gr.Markdown(f"Analyzing data from {CSV_PATH}")
with gr.Row():
with gr.Column():
nu_slider = gr.Slider(0.01, 0.2, value=0.05, step=0.01, label="SVM nu parameter (controls anomaly threshold)")
cluster_slider = gr.Slider(2, 10, value=2, step=1, label="Number of KMeans clusters")
detect_button = gr.Button("Detect Anomalies")
with gr.Column():
summary_output = gr.HTML(label="Summary")
with gr.Row():
pie_output = gr.Image(label="Anomaly Distribution")
svm_output = gr.Image(label="SVM Anomaly Detection")
with gr.Row():
kmeans_output = gr.Image(label="KMeans Clustering")
with gr.Row():
anomalies_output = gr.HTML(label="Detected Anomalies")
detect_button.click(
detect_anomalies,
inputs=[nu_slider, cluster_slider],
outputs=[pie_output, kmeans_output, svm_output, summary_output, anomalies_output]
)
gr.Markdown("""
## How to Use
1. Adjust the SVM nu parameter (controls anomaly detection sensitivity)
2. Choose the number of clusters for KMeans
3. Click 'Detect Anomalies' to analyze the data
## Interpretation
- The pie chart shows the proportion of normal vs anomalous transactions
- The scatter plots visualize the clusters and anomalies
- The AI insights provide expert analysis of detected anomalies
- The table displays detailed information about detected anomalies
""")
# Launch the app
if __name__ == "__main__":
demo.launch(share=True) |