Spaces:

AITestingWorkSpace
/

FraudNLP

Paused

App Files Files Community

vishalsh13 commited on Feb 2, 2025

Commit

adec62c

1 Parent(s): a6b2f62

code update

Browse files

Files changed (1) hide show

app.py +26 -55

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import gradio as gr
 import pandas as pd
 import numpy as np
-import seaborn as sns
-import matplotlib.pyplot as plt
 import re
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.preprocessing import LabelEncoder
@@ -37,11 +35,15 @@ def load_data():
 data = load_data()
-# Preprocessing
-le = LabelEncoder()
-data['Type_encoded'] = le.fit_transform(data['Type'])
-data['City_encoded'] = le.fit_transform(data['City'])
-data['Income_encoded'] = le.fit_transform(data['Income'])
 # Train model
 features = ['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']
@@ -51,7 +53,6 @@ y = data['Fraud']
 model = RandomForestClassifier(random_state=42, n_estimators=100)
 model.fit(X, y)
-# Enhanced NLP processing with fuzzy matching
 def process_nl_query(query):
     try:
         # Extract amount
@@ -59,7 +60,7 @@ def process_nl_query(query):
         if amount_match:
             amount = float(amount_match.group(1).replace(',', ''))
         else:
-            return "Error: Could not extract transaction amount. Please specify the amount clearly."
         # Extract transaction type
         trans_type = 'Credit' if 'credit' in query.lower() else 'Debit'
@@ -67,26 +68,27 @@ def process_nl_query(query):
         # Fuzzy match city
         cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
         city_match = process.extractOne(query, cities)
-        city = city_match[0] if city_match[1] > 70 else None
         # Extract age
         age_match = re.search(r'(\d+)\s*(?:years?|yrs?)?(?:\s*old)?', query)
-        if age_match:
-            age = int(age_match.group(1))
-        else:
-            return "Error: Could not extract age. Please specify the age clearly."
         # Extract income level
         income = 'Low' if 'low' in query.lower() else \
                  'High' if 'high' in query.lower() else 'Medium'
         # Prepare input
         input_df = pd.DataFrame({
             'Amount': [amount],
-            'Type_encoded': le.transform([trans_type])[0],
-            'City_encoded': le.transform([city])[0] if city else -1,
-            'Age': [age],
-            'Income_encoded': le.transform([income])[0]
         })
         # Predict
@@ -106,7 +108,7 @@ def process_nl_query(query):
             f"Transaction Details:\n"
             f"- Amount: ${amount:,.2f}\n"
             f"- Type: {trans_type}\n"
-            f"- City: {city if city else 'Unknown'}\n"
             f"- Age: {age}\n"
             f"- Income Level: {income}\n\n"
             f"Fraud Analysis:\n"
@@ -116,47 +118,20 @@ def process_nl_query(query):
         )
     except Exception as e:
-        return f"Error processing query: {str(e)}. Please provide clear details including amount, type, city, age, and income level."
-# Plotting functions
-def plot_fraud_by_city():
-    plt.figure(figsize=(10, 6))
-    sns.countplot(data=data[data['Fraud'] == 1], x='City')
-    plt.title('Fraud Cases by City')
-    plt.xlabel('City')
-    plt.ylabel('Number of Fraud Cases')
-    return plt
-def plot_fraud_by_income():
-    plt.figure(figsize=(10, 6))
-    sns.countplot(data=data[data['Fraud'] == 1], x='Income')
-    plt.title('Fraud Cases by Income Level')
-    plt.xlabel('Income Level')
-    plt.ylabel('Number of Fraud Cases')
-    return plt
-def plot_amount_vs_age():
-    plt.figure(figsize=(10, 6))
-    sns.scatterplot(data=data, x='Amount', y='Age', hue='Fraud')
-    plt.title('Transaction Amount vs Age (Fraud Highlighted)')
-    plt.xlabel('Transaction Amount')
-    plt.ylabel('Age')
-    return plt
 # Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("## Natural Language Fraud Detection System")
     with gr.Tab("Natural Language Query"):
-        gr.Markdown("**Example:** 'I saw a credit transaction of $6000 in New York for a 26-year-old client with low income. Is this suspicious?'")
         nl_input = gr.Textbox(label="Enter your transaction query:")
         nl_output = gr.Textbox(label="Fraud Analysis", lines=10)
         gr.Examples(
             examples=[
-                "Is a $8000 credit transaction in Chicago for a 45-year-old with medium income suspicious?",
-                "Check a debit of $300 in Phoenix for a 60-year-old high income client",
-                "A $12,000 credit transaction occurred in Los Angeles for a 30-year-old with low income. Should I be concerned?",
-                "Verify a $5,500 debit in New York by a 22-year-old medium income individual"
             ],
             inputs=nl_input
         )
@@ -165,9 +140,5 @@ with gr.Blocks() as demo:
     with gr.Tab("Data Insights"):
         gr.Markdown("### Fraud Pattern Analysis")
         gr.DataFrame(data[data['Fraud'] == 1].describe())
-        with gr.Row():
-            gr.Plot(plot_fraud_by_city)
-            gr.Plot(plot_fraud_by_income)
-        gr.Plot(plot_amount_vs_age)
 demo.launch()

 import gradio as gr
 import pandas as pd
 import numpy as np
 import re
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.preprocessing import LabelEncoder
 data = load_data()
+# Initialize separate encoders for each feature
+le_type = LabelEncoder()
+le_city = LabelEncoder()
+le_income = LabelEncoder()
+# Fit encoders on full dataset (or training data in real scenarios)
+data['Type_encoded'] = le_type.fit_transform(data['Type'])
+data['City_encoded'] = le_city.fit_transform(data['City'])
+data['Income_encoded'] = le_income.fit_transform(data['Income'])
 # Train model
 features = ['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']
 model = RandomForestClassifier(random_state=42, n_estimators=100)
 model.fit(X, y)
 def process_nl_query(query):
     try:
         # Extract amount
         if amount_match:
             amount = float(amount_match.group(1).replace(',', ''))
         else:
+            return "Error: Could not extract transaction amount."
         # Extract transaction type
         trans_type = 'Credit' if 'credit' in query.lower() else 'Debit'
         # Fuzzy match city
         cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
         city_match = process.extractOne(query, cities)
+        city = city_match[0] if city_match[1] > 70 else 'Unknown'
         # Extract age
         age_match = re.search(r'(\d+)\s*(?:years?|yrs?)?(?:\s*old)?', query)
+        age = int(age_match.group(1)) if age_match else None
         # Extract income level
         income = 'Low' if 'low' in query.lower() else \
                  'High' if 'high' in query.lower() else 'Medium'
+        # Handle unseen labels
+        city_encoded = le_city.transform([city])[0] if city in le_city.classes_ else -1
+        income_encoded = le_income.transform([income])[0] if income in le_income.classes_ else -1
         # Prepare input
         input_df = pd.DataFrame({
             'Amount': [amount],
+            'Type_encoded': le_type.transform([trans_type])[0],
+            'City_encoded': city_encoded,
+            'Age': [age] if age else data['Age'].median(),  # Handle missing age
+            'Income_encoded': income_encoded
         })
         # Predict
             f"Transaction Details:\n"
             f"- Amount: ${amount:,.2f}\n"
             f"- Type: {trans_type}\n"
+            f"- City: {city}\n"
             f"- Age: {age}\n"
             f"- Income Level: {income}\n\n"
             f"Fraud Analysis:\n"
         )
     except Exception as e:
+        return f"Error processing query: {str(e)}"
 # Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("## Enhanced Fraud Detection System")
     with gr.Tab("Natural Language Query"):
+        gr.Markdown("**Example:** 'Check a $6000 credit in New York for a 26-year-old with low income'")
         nl_input = gr.Textbox(label="Enter your transaction query:")
         nl_output = gr.Textbox(label="Fraud Analysis", lines=10)
         gr.Examples(
             examples=[
+                "Is a $8000 credit in Chicago for a 45-year-old medium income safe?",
+                "Verify a $300 debit in Phoenix for a 60-year-old high income client"
             ],
             inputs=nl_input
         )
     with gr.Tab("Data Insights"):
         gr.Markdown("### Fraud Pattern Analysis")
         gr.DataFrame(data[data['Fraud'] == 1].describe())
 demo.launch()