Spaces:

prernajeet01
/

fraud_detection

Runtime error

App Files Files Community

prernajeet01 commited on May 28, 2025

Commit

5c504da

verified ·

1 Parent(s): f30a9cc

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -160

app.py CHANGED Viewed

@@ -8,127 +8,19 @@ import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.ensemble import IsolationForest
 from sklearn.preprocessing import StandardScaler
-import openai
 from datetime import datetime, timedelta
 import json
 import tempfile
-# Set OpenAI API key from Hugging Face Spaces secrets
-openai.api_key = os.environ.get("OPENAI_API_KEY")
 def analyze_dataset_structure(df):
-    """Use OpenAI to analyze the dataset structure and identify relevant columns"""
-    if not openai.api_key:
-        return None, "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
-    try:
-        # Get basic dataset info
-        sample_data = df.head(3).to_dict(orient='records')
-        column_info = []
-        for col in df.columns:
-            dtype = str(df[col].dtype)
-            unique_values = len(df[col].unique())
-            null_percentage = round((df[col].isna().sum() / len(df)) * 100, 2)
-            sample_values = df[col].dropna().sample(min(3, len(df[col].dropna()))).tolist()
-            column_info.append({
-                "column_name": col,
-                "data_type": dtype,
-                "unique_values_count": unique_values,
-                "null_percentage": null_percentage,
-                "sample_values": str(sample_values)[:100]  # Limit sample length
-            })
-        # Create prompt for OpenAI
-        prompt = f"""
-        Analyze this transaction dataset structure to identify the purpose of each column.
-        Dataset Information:
-        - Number of rows: {len(df)}
-        - Number of columns: {len(df.columns)}
-        Column Information:
-        {json.dumps(column_info, indent=2)}
-        Sample Data:
-        {json.dumps(sample_data, indent=2)}
-        For each column in the dataset, identify its likely purpose in a transaction dataset.
-        Specifically identify:
-        1. Which column is likely the transaction ID or reference number
-        2. Which column represents the transaction amount or value
-        3. Which column represents the timestamp or date of the transaction
-        4. Which column represents the user ID, account ID, or customer identifier
-        5. Which column might represent location information
-        6. Which columns might be useful for fraud detection (e.g., IP address, device info, transaction status)
-        Return your analysis as a JSON object with this structure:
-        {
-            "id_column": "column_name",
-            "amount_column": "column_name",
-            "timestamp_column": "column_name",
-            "user_column": "column_name",
-            "location_column": "column_name",
-            "fraud_indicator_columns": ["column1", "column2"],
-            "column_descriptions": {
-                "column_name": "description of purpose"
-            }
-        }
-        Include only columns that you're reasonably confident about, and use null for any category where you can't identify a matching column.
-        """
-        # Create an OpenAI client with the API key
-        client = openai.OpenAI(api_key=openai.api_key)
-        # Call OpenAI API
-        response = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You are a data analysis expert specializing in financial transaction data structures."},
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=1000,
-            response_format={"type": "json_object"}
-        )
-        # Parse the JSON response
-        structure_analysis = json.loads(response.choices[0].message.content)
-        # Also get a natural language explanation
-        explanation_prompt = f"""
-        Based on your analysis of the dataset structure, provide a brief natural language explanation of:
-        1. What kind of transactions this dataset appears to contain
-        2. What the key columns are and what they represent
-        3. What approach would be best for detecting anomalies or fraud in this specific dataset
-        Keep your explanation concise and focused on the unique characteristics of this dataset.
-        """
-        explanation_response = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You are a data analysis expert specializing in financial transaction data structures."},
-                {"role": "user", "content": prompt},
-                {"role": "assistant", "content": response.choices[0].message.content},
-                {"role": "user", "content": explanation_prompt}
-            ],
-            max_tokens=500
-        )
-        explanation = explanation_response.choices[0].message.content
-        return structure_analysis, explanation
-    except Exception as e:
-        return None, f"Error analyzing dataset structure: {str(e)}"
-def analyze_dataset_structure(df):
-    """Use OpenAI to analyze the dataset structure and identify relevant columns"""
-    if not openai.api_key:
-        return None, "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
     try:
         # Get basic dataset info
@@ -169,7 +61,7 @@ def analyze_dataset_structure(df):
                 "sample_values": sample_values_str
             })
-        # Create prompt for OpenAI
         prompt = f"""
         Analyze this transaction dataset structure to identify the purpose of each column.
@@ -209,22 +101,25 @@ def analyze_dataset_structure(df):
         Include only columns that you're reasonably confident about, and use null for any category where you can't identify a matching column.
         """
-        # Create an OpenAI client with the API key
-        client = openai.OpenAI(api_key=openai.api_key)
-        # Call OpenAI API
-        response = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You are a data analysis expert specializing in financial transaction data structures."},
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=1000,
-            response_format={"type": "json_object"}
-        )
         # Parse the JSON response
-        structure_analysis = json.loads(response.choices[0].message.content)
         # Also get a natural language explanation
         explanation_prompt = f"""
@@ -236,18 +131,8 @@ def analyze_dataset_structure(df):
         Keep your explanation concise and focused on the unique characteristics of this dataset.
         """
-        explanation_response = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You are a data analysis expert specializing in financial transaction data structures."},
-                {"role": "user", "content": prompt},
-                {"role": "assistant", "content": response.choices[0].message.content},
-                {"role": "user", "content": explanation_prompt}
-            ],
-            max_tokens=500
-        )
-        explanation = explanation_response.choices[0].message.content
         return structure_analysis, explanation
@@ -519,12 +404,13 @@ def create_visualizations(df, column_mapping):
     return visualizations
 def analyze_transaction_with_ai(transaction_data, suspicious_transactions, column_mapping):
-    """Use OpenAI to analyze suspicious transactions and provide insights"""
-    if not openai.api_key:
-        return "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
     try:
-        # Prepare information for OpenAI, converting to a JSON-serializable format
         suspicious_sample = suspicious_transactions.head(5).copy()
         # Convert any datetime columns to string format to make it JSON serializable
@@ -556,7 +442,7 @@ def analyze_transaction_with_ai(transaction_data, suspicious_transactions, colum
                 "suspicious_avg_amount": float(round(suspicious_transactions[amount_col].mean(), 2))
             })
-        # Create prompt for OpenAI
         prompt = f"""
         Analyze these potentially fraudulent transactions and identify patterns or anomalies:
@@ -575,21 +461,14 @@ def analyze_transaction_with_ai(transaction_data, suspicious_transactions, colum
         3. Recommended next steps for investigation
         """
-        # Create an OpenAI client with the API key
-        client = openai.OpenAI(api_key=openai.api_key)
-        # Call OpenAI API
-        response = client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You are a fraud detection expert helping analyze suspicious financial transactions."},
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=800
-        )
         # Return the AI analysis
-        return response.choices[0].message.content
     except Exception as e:
         import traceback

 import plotly.graph_objects as go
 from sklearn.ensemble import IsolationForest
 from sklearn.preprocessing import StandardScaler
+import google.generativeai as genai
 from datetime import datetime, timedelta
 import json
 import tempfile
+# Set Gemini API key from Hugging Face Spaces secrets
+genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
 def analyze_dataset_structure(df):
+    """Use Gemini to analyze the dataset structure and identify relevant columns"""
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        return None, "Gemini API key not found. Please add it to the Hugging Face Spaces secrets."
     try:
         # Get basic dataset info
                 "sample_values": sample_values_str
             })
+        # Create prompt for Gemini
         prompt = f"""
         Analyze this transaction dataset structure to identify the purpose of each column.
         Include only columns that you're reasonably confident about, and use null for any category where you can't identify a matching column.
         """
+        # Create Gemini model
+        model = genai.GenerativeModel('gemini-pro')
+        # Call Gemini API
+        response = model.generate_content(prompt)
         # Parse the JSON response
+        response_text = response.text
+        # Extract JSON from response if it's wrapped in markdown code blocks
+        if "```json" in response_text:
+            json_start = response_text.find("```json") + 7
+            json_end = response_text.find("```", json_start)
+            response_text = response_text[json_start:json_end].strip()
+        elif "```" in response_text:
+            json_start = response_text.find("```") + 3
+            json_end = response_text.find("```", json_start)
+            response_text = response_text[json_start:json_end].strip()
+        structure_analysis = json.loads(response_text)
         # Also get a natural language explanation
         explanation_prompt = f"""
         Keep your explanation concise and focused on the unique characteristics of this dataset.
         """
+        explanation_response = model.generate_content(explanation_prompt)
+        explanation = explanation_response.text
         return structure_analysis, explanation
     return visualizations
 def analyze_transaction_with_ai(transaction_data, suspicious_transactions, column_mapping):
+    """Use Gemini to analyze suspicious transactions and provide insights"""
+    api_key = os.environ.get("GEMINI_API_KEY")
+    if not api_key:
+        return "Gemini API key not found. Please add it to the Hugging Face Spaces secrets."
     try:
+        # Prepare information for Gemini, converting to a JSON-serializable format
         suspicious_sample = suspicious_transactions.head(5).copy()
         # Convert any datetime columns to string format to make it JSON serializable
                 "suspicious_avg_amount": float(round(suspicious_transactions[amount_col].mean(), 2))
             })
+        # Create prompt for Gemini
         prompt = f"""
         Analyze these potentially fraudulent transactions and identify patterns or anomalies:
         3. Recommended next steps for investigation
         """
+        # Create Gemini model
+        model = genai.GenerativeModel('gemini-pro')
+        # Call Gemini API
+        response = model.generate_content(prompt)
         # Return the AI analysis
+        return response.text
     except Exception as e:
         import traceback