prernajeet01 commited on
Commit
69cbf5f
·
verified ·
1 Parent(s): 05d9f3e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +204 -0
app.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ from sklearn.svm import OneClassSVM
7
+ from sklearn.cluster import KMeans
8
+ from sklearn.preprocessing import scale
9
+ import matplotlib
10
+ matplotlib.use('Agg')
11
+ import io
12
+ import base64
13
+ from PIL import Image
14
+ import os
15
+ import openai
16
+
17
+ # Path to the CSV file in the environment
18
+ CSV_PATH = 'FI_Transactions.csv'
19
+
20
+ def detect_anomalies(api_key, nu_value, n_clusters):
21
+ # Set OpenAI API Key
22
+ os.environ["OPENAI_API_KEY"] = api_key
23
+ openai.api_key = api_key
24
+
25
+ # Read the CSV file from the environment
26
+ df = pd.read_csv(CSV_PATH)
27
+
28
+ # Data preprocessing and scaling
29
+ features = df.select_dtypes(include=[np.number])
30
+ feature_names = features.columns.tolist()
31
+ scaled_features = scale(features)
32
+
33
+ # Train One-Class SVM for anomaly detection
34
+ svm_model = OneClassSVM(kernel='rbf', nu=nu_value, gamma='scale')
35
+ svm_model.fit(scaled_features)
36
+
37
+ # Predict anomalies
38
+ svm_preds = svm_model.predict(scaled_features)
39
+ df['SVM_Anomaly'] = ['Anomaly' if x == -1 else 'Normal' for x in svm_preds]
40
+
41
+ # Count anomalies
42
+ anomaly_count = (df['SVM_Anomaly'] == 'Anomaly').sum()
43
+ normal_count = (df['SVM_Anomaly'] == 'Normal').sum()
44
+
45
+ # Train KMeans for clustering
46
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
47
+ kmeans.fit(scaled_features)
48
+ df['KMeans_Cluster'] = kmeans.labels_
49
+
50
+ # Create visualizations
51
+
52
+ # 1. Anomalies count pie chart
53
+ plt.figure(figsize=(8, 6))
54
+ plt.pie([anomaly_count, normal_count], labels=['Anomalies', 'Normal'], autopct='%1.1f%%', colors=['#FF9999', '#66B2FF'])
55
+ plt.title('SVM Anomaly Detection Results')
56
+ pie_chart_img = plt_to_img()
57
+
58
+ # 2. KMeans clustering scatter plot (using first two features)
59
+ plt.figure(figsize=(10, 6))
60
+ x_feature = 0 if len(feature_names) > 0 else 0
61
+ y_feature = 1 if len(feature_names) > 1 else 0
62
+
63
+ scatter = plt.scatter(scaled_features[:, x_feature],
64
+ scaled_features[:, y_feature],
65
+ c=kmeans.labels_,
66
+ cmap='viridis',
67
+ alpha=0.7)
68
+ plt.colorbar(scatter, label='Cluster')
69
+ plt.title('KMeans Clustering Results')
70
+ plt.xlabel(feature_names[x_feature] if len(feature_names) > x_feature else "Feature 1")
71
+ plt.ylabel(feature_names[y_feature] if len(feature_names) > y_feature else "Feature 2")
72
+ kmeans_img = plt_to_img()
73
+
74
+ # 3. SVM anomalies scatter plot
75
+ plt.figure(figsize=(10, 6))
76
+ colors = ['red' if x == 'Anomaly' else 'blue' for x in df['SVM_Anomaly']]
77
+ plt.scatter(scaled_features[:, x_feature],
78
+ scaled_features[:, y_feature],
79
+ c=colors,
80
+ alpha=0.7)
81
+ plt.title('SVM Anomaly Detection')
82
+ plt.xlabel(feature_names[x_feature] if len(feature_names) > x_feature else "Feature 1")
83
+ plt.ylabel(feature_names[y_feature] if len(feature_names) > y_feature else "Feature 2")
84
+ plt.legend(['Anomaly', 'Normal'])
85
+ svm_img = plt_to_img()
86
+
87
+ # Create summary dataframe of anomalies
88
+ anomalies_df = df[df['SVM_Anomaly'] == 'Anomaly'].reset_index()
89
+
90
+ # Get AI insights about anomalies using OpenAI API
91
+ ai_insights = get_ai_insights(df, anomalies_df)
92
+
93
+ # Convert the dataframe to HTML for display
94
+ anomalies_html = anomalies_df.to_html(classes='table table-striped')
95
+
96
+ # Create HTML summary
97
+ summary_html = f"""
98
+ <h3>Analysis Summary</h3>
99
+ <p>Total transactions: {len(df)}</p>
100
+ <p>Anomalies detected: {anomaly_count} ({anomaly_count/len(df)*100:.2f}%)</p>
101
+ <p>Normal transactions: {normal_count} ({normal_count/len(df)*100:.2f}%)</p>
102
+
103
+ <h3>AI Insights</h3>
104
+ <p>{ai_insights}</p>
105
+ """
106
+
107
+ return pie_chart_img, kmeans_img, svm_img, summary_html, anomalies_html
108
+
109
+ def get_ai_insights(df, anomalies_df):
110
+ """Get insights about the anomalies using OpenAI API"""
111
+ try:
112
+ # Prepare information about the dataset and anomalies
113
+ df_info = df.describe().to_string()
114
+ anomaly_info = anomalies_df.head(5).to_string() if not anomalies_df.empty else "No anomalies detected"
115
+
116
+ # Create a prompt for the OpenAI API
117
+ prompt = f"""
118
+ Analyze the following financial transaction data and detected anomalies:
119
+
120
+ Dataset Statistics:
121
+ {df_info}
122
+
123
+ Sample Anomalies (top 5):
124
+ {anomaly_info}
125
+
126
+ Please provide:
127
+ 1. Possible patterns or reasons for these anomalies
128
+ 2. Recommendations for further investigation
129
+ 3. Potential risk factors these anomalies might indicate
130
+
131
+ Keep your analysis concise and focused on financial fraud detection.
132
+ """
133
+
134
+ # Call the OpenAI API
135
+ response = openai.ChatCompletion.create(
136
+ model="gpt-3.5-turbo",
137
+ messages=[
138
+ {"role": "system", "content": "You are a financial fraud detection expert."},
139
+ {"role": "user", "content": prompt}
140
+ ],
141
+ max_tokens=500
142
+ )
143
+
144
+ # Extract and return the insights
145
+ return response.choices[0].message.content
146
+
147
+ except Exception as e:
148
+ return f"Could not generate AI insights. Please check your API key and try again. Error: {str(e)}"
149
+
150
+ def plt_to_img():
151
+ buf = io.BytesIO()
152
+ plt.savefig(buf, format='png')
153
+ buf.seek(0)
154
+ img = Image.open(buf)
155
+ return img
156
+
157
+ # Create the Gradio interface
158
+ with gr.Blocks(title="Financial Transaction Anomaly Detection") as demo:
159
+ gr.Markdown("# Financial Transaction Anomaly Detection")
160
+ gr.Markdown(f"Analyzing data from {CSV_PATH}")
161
+
162
+ with gr.Row():
163
+ with gr.Column():
164
+ api_key = gr.Textbox(type="password", label="OpenAI API Key", placeholder="Enter your OpenAI API key")
165
+ nu_slider = gr.Slider(0.01, 0.2, value=0.05, step=0.01, label="SVM nu parameter (controls anomaly threshold)")
166
+ cluster_slider = gr.Slider(2, 10, value=2, step=1, label="Number of KMeans clusters")
167
+ detect_button = gr.Button("Detect Anomalies")
168
+
169
+ with gr.Column():
170
+ summary_output = gr.HTML(label="Summary")
171
+
172
+ with gr.Row():
173
+ pie_output = gr.Image(label="Anomaly Distribution")
174
+ svm_output = gr.Image(label="SVM Anomaly Detection")
175
+
176
+ with gr.Row():
177
+ kmeans_output = gr.Image(label="KMeans Clustering")
178
+
179
+ with gr.Row():
180
+ anomalies_output = gr.HTML(label="Detected Anomalies")
181
+
182
+ detect_button.click(
183
+ detect_anomalies,
184
+ inputs=[api_key, nu_slider, cluster_slider],
185
+ outputs=[pie_output, kmeans_output, svm_output, summary_output, anomalies_output]
186
+ )
187
+
188
+ gr.Markdown("""
189
+ ## How to Use
190
+ 1. Enter your OpenAI API key for AI-powered insights
191
+ 2. Adjust the SVM nu parameter (controls anomaly detection sensitivity)
192
+ 3. Choose the number of clusters for KMeans
193
+ 4. Click 'Detect Anomalies' to analyze the data
194
+
195
+ ## Interpretation
196
+ - The pie chart shows the proportion of normal vs anomalous transactions
197
+ - The scatter plots visualize the clusters and anomalies
198
+ - The AI insights provide expert analysis of detected anomalies
199
+ - The table displays detailed information about detected anomalies
200
+ """)
201
+
202
+ # Launch the app
203
+ if __name__ == "__main__":
204
+ demo.launch(share=True)