import gradio as gr import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np from sklearn.svm import OneClassSVM from sklearn.cluster import KMeans from sklearn.preprocessing import scale import matplotlib matplotlib.use('Agg') import io import base64 from PIL import Image import os from openai import OpenAI # Path to the CSV file in the environment CSV_PATH = 'FI_Transactions.csv' # Get OpenAI API key from environment variables OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") client = OpenAI(api_key=OPENAI_API_KEY) def detect_anomalies(nu_value, n_clusters): # Read the CSV file from the environment df = pd.read_csv(CSV_PATH) # Data preprocessing and scaling features = df.select_dtypes(include=[np.number]) feature_names = features.columns.tolist() scaled_features = scale(features) # Train One-Class SVM for anomaly detection svm_model = OneClassSVM(kernel='rbf', nu=nu_value, gamma='scale') svm_model.fit(scaled_features) # Predict anomalies svm_preds = svm_model.predict(scaled_features) df['SVM_Anomaly'] = ['Anomaly' if x == -1 else 'Normal' for x in svm_preds] # Count anomalies anomaly_count = (df['SVM_Anomaly'] == 'Anomaly').sum() normal_count = (df['SVM_Anomaly'] == 'Normal').sum() # Train KMeans for clustering kmeans = KMeans(n_clusters=n_clusters, random_state=42) kmeans.fit(scaled_features) df['KMeans_Cluster'] = kmeans.labels_ # Create visualizations # 1. Anomalies count pie chart plt.figure(figsize=(8, 6)) plt.pie([anomaly_count, normal_count], labels=['Anomalies', 'Normal'], autopct='%1.1f%%', colors=['#FF9999', '#66B2FF']) plt.title('SVM Anomaly Detection Results') pie_chart_img = plt_to_img() # 2. KMeans clustering scatter plot (using first two features) plt.figure(figsize=(10, 6)) x_feature = 0 if len(feature_names) > 0 else 0 y_feature = 1 if len(feature_names) > 1 else 0 scatter = plt.scatter(scaled_features[:, x_feature], scaled_features[:, y_feature], c=kmeans.labels_, cmap='viridis', alpha=0.7) plt.colorbar(scatter, label='Cluster') plt.title('KMeans Clustering Results') plt.xlabel(feature_names[x_feature] if len(feature_names) > x_feature else "Feature 1") plt.ylabel(feature_names[y_feature] if len(feature_names) > y_feature else "Feature 2") kmeans_img = plt_to_img() # 3. SVM anomalies scatter plot plt.figure(figsize=(10, 6)) colors = ['red' if x == 'Anomaly' else 'blue' for x in df['SVM_Anomaly']] plt.scatter(scaled_features[:, x_feature], scaled_features[:, y_feature], c=colors, alpha=0.7) plt.title('SVM Anomaly Detection') plt.xlabel(feature_names[x_feature] if len(feature_names) > x_feature else "Feature 1") plt.ylabel(feature_names[y_feature] if len(feature_names) > y_feature else "Feature 2") plt.legend(['Anomaly', 'Normal']) svm_img = plt_to_img() # Create summary dataframe of anomalies anomalies_df = df[df['SVM_Anomaly'] == 'Anomaly'].reset_index() # Get AI insights about anomalies using OpenAI API ai_insights = get_ai_insights(df, anomalies_df) # Convert the dataframe to HTML for display anomalies_html = anomalies_df.to_html(classes='table table-striped') # Create HTML summary summary_html = f"""
Total transactions: {len(df)}
Anomalies detected: {anomaly_count} ({anomaly_count/len(df)*100:.2f}%)
Normal transactions: {normal_count} ({normal_count/len(df)*100:.2f}%)
{ai_insights}
""" return pie_chart_img, kmeans_img, svm_img, summary_html, anomalies_html def get_ai_insights(df, anomalies_df): """Get insights about the anomalies using OpenAI API""" try: if not OPENAI_API_KEY: return "OpenAI API key not found in environment variables. AI insights are unavailable." # Prepare information about the dataset and anomalies df_info = df.describe().to_string() anomaly_info = anomalies_df.head(5).to_string() if not anomalies_df.empty else "No anomalies detected" # Create a prompt for the OpenAI API prompt = f""" Analyze the following financial transaction data and detected anomalies: Dataset Statistics: {df_info} Sample Anomalies (top 5): {anomaly_info} Please provide: 1. Possible patterns or reasons for these anomalies 2. Recommendations for further investigation 3. Potential risk factors these anomalies might indicate Keep your analysis concise and focused on financial fraud detection. """ # Call the OpenAI API using the new client format response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a financial fraud detection expert."}, {"role": "user", "content": prompt} ], max_tokens=500 ) # Extract and return the insights return response.choices[0].message.content except Exception as e: return f"Could not generate AI insights. Error: {str(e)}" def plt_to_img(): buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) img = Image.open(buf) return img # Create the Gradio interface with gr.Blocks(title="Financial Transaction Anomaly Detection") as demo: gr.Markdown("# Financial Transaction Anomaly Detection") gr.Markdown(f"Analyzing data from {CSV_PATH}") with gr.Row(): with gr.Column(): nu_slider = gr.Slider(0.01, 0.2, value=0.05, step=0.01, label="SVM nu parameter (controls anomaly threshold)") cluster_slider = gr.Slider(2, 10, value=2, step=1, label="Number of KMeans clusters") detect_button = gr.Button("Detect Anomalies") with gr.Column(): summary_output = gr.HTML(label="Summary") with gr.Row(): pie_output = gr.Image(label="Anomaly Distribution") svm_output = gr.Image(label="SVM Anomaly Detection") with gr.Row(): kmeans_output = gr.Image(label="KMeans Clustering") with gr.Row(): anomalies_output = gr.HTML(label="Detected Anomalies") detect_button.click( detect_anomalies, inputs=[nu_slider, cluster_slider], outputs=[pie_output, kmeans_output, svm_output, summary_output, anomalies_output] ) gr.Markdown(""" ## How to Use 1. Adjust the SVM nu parameter (controls anomaly detection sensitivity) 2. Choose the number of clusters for KMeans 3. Click 'Detect Anomalies' to analyze the data ## Interpretation - The pie chart shows the proportion of normal vs anomalous transactions - The scatter plots visualize the clusters and anomalies - The AI insights provide expert analysis of detected anomalies - The table displays detailed information about detected anomalies """) # Launch the app if __name__ == "__main__": demo.launch(share=True)