Spaces:

AITestingWorkSpace
/

FraudNLP

Paused

App Files Files Community

vishalsh13 commited on Feb 2, 2025

Commit

a6b2f62

1 Parent(s): 0ab515f

initial commit

Browse files

Files changed (2) hide show

app.py +173 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import re
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.preprocessing import LabelEncoder
+from fuzzywuzzy import process
+# Enhanced data generation with realistic fraud patterns
+def load_data():
+    np.random.seed(42)
+    cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
+    age_groups = ['18-25', '26-35', '36-45', '46-55', '56+']
+    incomes = ['Low', 'Medium', 'High']
+    data = pd.DataFrame({
+        'TransactionID': range(1, 1001),
+        'Amount': np.random.uniform(10, 15000, 1000).round(2),
+        'Type': np.random.choice(['Credit', 'Debit'], 1000),
+        'City': np.random.choice(cities, 1000),
+        'Age': np.random.randint(18, 70, 1000),
+        'Income': np.random.choice(incomes, 1000, p=[0.4, 0.4, 0.2])
+    })
+    # Create realistic fraud patterns
+    data['Fraud'] = 0
+    data.loc[
+        ((data['Amount'] > 5000) & (data['Income'] == 'Low')) |
+        ((data['Type'] == 'Credit') & (data['Amount'] > 8000)) |
+        ((data['City'] == 'New York') & (data['Age'].between(20, 35)) & (data['Amount'] > 6000)),
+        'Fraud'
+    ] = 1
+    return data
+data = load_data()
+# Preprocessing
+le = LabelEncoder()
+data['Type_encoded'] = le.fit_transform(data['Type'])
+data['City_encoded'] = le.fit_transform(data['City'])
+data['Income_encoded'] = le.fit_transform(data['Income'])
+# Train model
+features = ['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']
+X = data[features]
+y = data['Fraud']
+model = RandomForestClassifier(random_state=42, n_estimators=100)
+model.fit(X, y)
+# Enhanced NLP processing with fuzzy matching
+def process_nl_query(query):
+    try:
+        # Extract amount
+        amount_match = re.search(r'\$?(\d+(?:,\d{3})*(?:\.\d{2})?)', query)
+        if amount_match:
+            amount = float(amount_match.group(1).replace(',', ''))
+        else:
+            return "Error: Could not extract transaction amount. Please specify the amount clearly."
+        # Extract transaction type
+        trans_type = 'Credit' if 'credit' in query.lower() else 'Debit'
+        # Fuzzy match city
+        cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
+        city_match = process.extractOne(query, cities)
+        city = city_match[0] if city_match[1] > 70 else None
+        # Extract age
+        age_match = re.search(r'(\d+)\s*(?:years?|yrs?)?(?:\s*old)?', query)
+        if age_match:
+            age = int(age_match.group(1))
+        else:
+            return "Error: Could not extract age. Please specify the age clearly."
+        # Extract income level
+        income = 'Low' if 'low' in query.lower() else \
+                 'High' if 'high' in query.lower() else 'Medium'
+        # Prepare input
+        input_df = pd.DataFrame({
+            'Amount': [amount],
+            'Type_encoded': le.transform([trans_type])[0],
+            'City_encoded': le.transform([city])[0] if city else -1,
+            'Age': [age],
+            'Income_encoded': le.transform([income])[0]
+        })
+        # Predict
+        proba = model.predict_proba(input_df)[0][1]
+        prediction = model.predict(input_df)[0]
+        # Generate explanation
+        explanation = []
+        if amount > 5000 and income == 'Low':
+            explanation.append("High amount for low income")
+        if amount > 8000 and trans_type == 'Credit':
+            explanation.append("Unusually large credit transaction")
+        if city == 'New York' and 20 <= age <= 35 and amount > 6000:
+            explanation.append("Suspicious pattern for young adults in NYC")
+        return (
+            f"Transaction Details:\n"
+            f"- Amount: ${amount:,.2f}\n"
+            f"- Type: {trans_type}\n"
+            f"- City: {city if city else 'Unknown'}\n"
+            f"- Age: {age}\n"
+            f"- Income Level: {income}\n\n"
+            f"Fraud Analysis:\n"
+            f"- Prediction: {'Potentially Fraudulent' if prediction else 'Likely Legitimate'}\n"
+            f"- Confidence: {proba*100:.1f}%\n"
+            f"- Risk Factors: {', '.join(explanation) if explanation else 'No specific risk factors identified'}"
+        )
+    except Exception as e:
+        return f"Error processing query: {str(e)}. Please provide clear details including amount, type, city, age, and income level."
+# Plotting functions
+def plot_fraud_by_city():
+    plt.figure(figsize=(10, 6))
+    sns.countplot(data=data[data['Fraud'] == 1], x='City')
+    plt.title('Fraud Cases by City')
+    plt.xlabel('City')
+    plt.ylabel('Number of Fraud Cases')
+    return plt
+def plot_fraud_by_income():
+    plt.figure(figsize=(10, 6))
+    sns.countplot(data=data[data['Fraud'] == 1], x='Income')
+    plt.title('Fraud Cases by Income Level')
+    plt.xlabel('Income Level')
+    plt.ylabel('Number of Fraud Cases')
+    return plt
+def plot_amount_vs_age():
+    plt.figure(figsize=(10, 6))
+    sns.scatterplot(data=data, x='Amount', y='Age', hue='Fraud')
+    plt.title('Transaction Amount vs Age (Fraud Highlighted)')
+    plt.xlabel('Transaction Amount')
+    plt.ylabel('Age')
+    return plt
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("## Natural Language Fraud Detection System")
+    with gr.Tab("Natural Language Query"):
+        gr.Markdown("**Example:** 'I saw a credit transaction of $6000 in New York for a 26-year-old client with low income. Is this suspicious?'")
+        nl_input = gr.Textbox(label="Enter your transaction query:")
+        nl_output = gr.Textbox(label="Fraud Analysis", lines=10)
+        gr.Examples(
+            examples=[
+                "Is a $8000 credit transaction in Chicago for a 45-year-old with medium income suspicious?",
+                "Check a debit of $300 in Phoenix for a 60-year-old high income client",
+                "A $12,000 credit transaction occurred in Los Angeles for a 30-year-old with low income. Should I be concerned?",
+                "Verify a $5,500 debit in New York by a 22-year-old medium income individual"
+            ],
+            inputs=nl_input
+        )
+        nl_input.submit(fn=process_nl_query, inputs=nl_input, outputs=nl_output)
+    with gr.Tab("Data Insights"):
+        gr.Markdown("### Fraud Pattern Analysis")
+        gr.DataFrame(data[data['Fraud'] == 1].describe())
+        with gr.Row():
+            gr.Plot(plot_fraud_by_city)
+            gr.Plot(plot_fraud_by_income)
+        gr.Plot(plot_amount_vs_age)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+pandas
+numpy
+scikit-learn
+matplotlib
+seaborn
+fuzzywuzzy
+python-Levenshtein