prernajeet01 commited on
Commit
5c504da
·
verified ·
1 Parent(s): f30a9cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -160
app.py CHANGED
@@ -8,127 +8,19 @@ import plotly.express as px
8
  import plotly.graph_objects as go
9
  from sklearn.ensemble import IsolationForest
10
  from sklearn.preprocessing import StandardScaler
11
- import openai
12
  from datetime import datetime, timedelta
13
  import json
14
  import tempfile
15
 
16
- # Set OpenAI API key from Hugging Face Spaces secrets
17
- openai.api_key = os.environ.get("OPENAI_API_KEY")
18
 
19
  def analyze_dataset_structure(df):
20
- """Use OpenAI to analyze the dataset structure and identify relevant columns"""
21
- if not openai.api_key:
22
- return None, "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
23
-
24
- try:
25
- # Get basic dataset info
26
- sample_data = df.head(3).to_dict(orient='records')
27
- column_info = []
28
-
29
- for col in df.columns:
30
- dtype = str(df[col].dtype)
31
- unique_values = len(df[col].unique())
32
- null_percentage = round((df[col].isna().sum() / len(df)) * 100, 2)
33
- sample_values = df[col].dropna().sample(min(3, len(df[col].dropna()))).tolist()
34
-
35
- column_info.append({
36
- "column_name": col,
37
- "data_type": dtype,
38
- "unique_values_count": unique_values,
39
- "null_percentage": null_percentage,
40
- "sample_values": str(sample_values)[:100] # Limit sample length
41
- })
42
-
43
- # Create prompt for OpenAI
44
- prompt = f"""
45
- Analyze this transaction dataset structure to identify the purpose of each column.
46
-
47
- Dataset Information:
48
- - Number of rows: {len(df)}
49
- - Number of columns: {len(df.columns)}
50
-
51
- Column Information:
52
- {json.dumps(column_info, indent=2)}
53
-
54
- Sample Data:
55
- {json.dumps(sample_data, indent=2)}
56
-
57
- For each column in the dataset, identify its likely purpose in a transaction dataset.
58
- Specifically identify:
59
-
60
- 1. Which column is likely the transaction ID or reference number
61
- 2. Which column represents the transaction amount or value
62
- 3. Which column represents the timestamp or date of the transaction
63
- 4. Which column represents the user ID, account ID, or customer identifier
64
- 5. Which column might represent location information
65
- 6. Which columns might be useful for fraud detection (e.g., IP address, device info, transaction status)
66
-
67
- Return your analysis as a JSON object with this structure:
68
- {
69
- "id_column": "column_name",
70
- "amount_column": "column_name",
71
- "timestamp_column": "column_name",
72
- "user_column": "column_name",
73
- "location_column": "column_name",
74
- "fraud_indicator_columns": ["column1", "column2"],
75
- "column_descriptions": {
76
- "column_name": "description of purpose"
77
- }
78
- }
79
-
80
- Include only columns that you're reasonably confident about, and use null for any category where you can't identify a matching column.
81
- """
82
-
83
- # Create an OpenAI client with the API key
84
- client = openai.OpenAI(api_key=openai.api_key)
85
-
86
- # Call OpenAI API
87
- response = client.chat.completions.create(
88
- model="gpt-3.5-turbo",
89
- messages=[
90
- {"role": "system", "content": "You are a data analysis expert specializing in financial transaction data structures."},
91
- {"role": "user", "content": prompt}
92
- ],
93
- max_tokens=1000,
94
- response_format={"type": "json_object"}
95
- )
96
-
97
- # Parse the JSON response
98
- structure_analysis = json.loads(response.choices[0].message.content)
99
-
100
- # Also get a natural language explanation
101
- explanation_prompt = f"""
102
- Based on your analysis of the dataset structure, provide a brief natural language explanation of:
103
- 1. What kind of transactions this dataset appears to contain
104
- 2. What the key columns are and what they represent
105
- 3. What approach would be best for detecting anomalies or fraud in this specific dataset
106
-
107
- Keep your explanation concise and focused on the unique characteristics of this dataset.
108
- """
109
-
110
- explanation_response = client.chat.completions.create(
111
- model="gpt-3.5-turbo",
112
- messages=[
113
- {"role": "system", "content": "You are a data analysis expert specializing in financial transaction data structures."},
114
- {"role": "user", "content": prompt},
115
- {"role": "assistant", "content": response.choices[0].message.content},
116
- {"role": "user", "content": explanation_prompt}
117
- ],
118
- max_tokens=500
119
- )
120
-
121
- explanation = explanation_response.choices[0].message.content
122
-
123
- return structure_analysis, explanation
124
-
125
- except Exception as e:
126
- return None, f"Error analyzing dataset structure: {str(e)}"
127
-
128
- def analyze_dataset_structure(df):
129
- """Use OpenAI to analyze the dataset structure and identify relevant columns"""
130
- if not openai.api_key:
131
- return None, "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
132
 
133
  try:
134
  # Get basic dataset info
@@ -169,7 +61,7 @@ def analyze_dataset_structure(df):
169
  "sample_values": sample_values_str
170
  })
171
 
172
- # Create prompt for OpenAI
173
  prompt = f"""
174
  Analyze this transaction dataset structure to identify the purpose of each column.
175
 
@@ -209,22 +101,25 @@ def analyze_dataset_structure(df):
209
  Include only columns that you're reasonably confident about, and use null for any category where you can't identify a matching column.
210
  """
211
 
212
- # Create an OpenAI client with the API key
213
- client = openai.OpenAI(api_key=openai.api_key)
214
-
215
- # Call OpenAI API
216
- response = client.chat.completions.create(
217
- model="gpt-3.5-turbo",
218
- messages=[
219
- {"role": "system", "content": "You are a data analysis expert specializing in financial transaction data structures."},
220
- {"role": "user", "content": prompt}
221
- ],
222
- max_tokens=1000,
223
- response_format={"type": "json_object"}
224
- )
225
 
226
  # Parse the JSON response
227
- structure_analysis = json.loads(response.choices[0].message.content)
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  # Also get a natural language explanation
230
  explanation_prompt = f"""
@@ -236,18 +131,8 @@ def analyze_dataset_structure(df):
236
  Keep your explanation concise and focused on the unique characteristics of this dataset.
237
  """
238
 
239
- explanation_response = client.chat.completions.create(
240
- model="gpt-3.5-turbo",
241
- messages=[
242
- {"role": "system", "content": "You are a data analysis expert specializing in financial transaction data structures."},
243
- {"role": "user", "content": prompt},
244
- {"role": "assistant", "content": response.choices[0].message.content},
245
- {"role": "user", "content": explanation_prompt}
246
- ],
247
- max_tokens=500
248
- )
249
-
250
- explanation = explanation_response.choices[0].message.content
251
 
252
  return structure_analysis, explanation
253
 
@@ -519,12 +404,13 @@ def create_visualizations(df, column_mapping):
519
  return visualizations
520
 
521
  def analyze_transaction_with_ai(transaction_data, suspicious_transactions, column_mapping):
522
- """Use OpenAI to analyze suspicious transactions and provide insights"""
523
- if not openai.api_key:
524
- return "OpenAI API key not found. Please add it to the Hugging Face Spaces secrets."
 
525
 
526
  try:
527
- # Prepare information for OpenAI, converting to a JSON-serializable format
528
  suspicious_sample = suspicious_transactions.head(5).copy()
529
 
530
  # Convert any datetime columns to string format to make it JSON serializable
@@ -556,7 +442,7 @@ def analyze_transaction_with_ai(transaction_data, suspicious_transactions, colum
556
  "suspicious_avg_amount": float(round(suspicious_transactions[amount_col].mean(), 2))
557
  })
558
 
559
- # Create prompt for OpenAI
560
  prompt = f"""
561
  Analyze these potentially fraudulent transactions and identify patterns or anomalies:
562
 
@@ -575,21 +461,14 @@ def analyze_transaction_with_ai(transaction_data, suspicious_transactions, colum
575
  3. Recommended next steps for investigation
576
  """
577
 
578
- # Create an OpenAI client with the API key
579
- client = openai.OpenAI(api_key=openai.api_key)
580
-
581
- # Call OpenAI API
582
- response = client.chat.completions.create(
583
- model="gpt-3.5-turbo",
584
- messages=[
585
- {"role": "system", "content": "You are a fraud detection expert helping analyze suspicious financial transactions."},
586
- {"role": "user", "content": prompt}
587
- ],
588
- max_tokens=800
589
- )
590
 
591
  # Return the AI analysis
592
- return response.choices[0].message.content
593
 
594
  except Exception as e:
595
  import traceback
 
8
  import plotly.graph_objects as go
9
  from sklearn.ensemble import IsolationForest
10
  from sklearn.preprocessing import StandardScaler
11
+ import google.generativeai as genai
12
  from datetime import datetime, timedelta
13
  import json
14
  import tempfile
15
 
16
+ # Set Gemini API key from Hugging Face Spaces secrets
17
+ genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
18
 
19
  def analyze_dataset_structure(df):
20
+ """Use Gemini to analyze the dataset structure and identify relevant columns"""
21
+ api_key = os.environ.get("GEMINI_API_KEY")
22
+ if not api_key:
23
+ return None, "Gemini API key not found. Please add it to the Hugging Face Spaces secrets."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  try:
26
  # Get basic dataset info
 
61
  "sample_values": sample_values_str
62
  })
63
 
64
+ # Create prompt for Gemini
65
  prompt = f"""
66
  Analyze this transaction dataset structure to identify the purpose of each column.
67
 
 
101
  Include only columns that you're reasonably confident about, and use null for any category where you can't identify a matching column.
102
  """
103
 
104
+ # Create Gemini model
105
+ model = genai.GenerativeModel('gemini-pro')
106
+
107
+ # Call Gemini API
108
+ response = model.generate_content(prompt)
 
 
 
 
 
 
 
 
109
 
110
  # Parse the JSON response
111
+ response_text = response.text
112
+ # Extract JSON from response if it's wrapped in markdown code blocks
113
+ if "```json" in response_text:
114
+ json_start = response_text.find("```json") + 7
115
+ json_end = response_text.find("```", json_start)
116
+ response_text = response_text[json_start:json_end].strip()
117
+ elif "```" in response_text:
118
+ json_start = response_text.find("```") + 3
119
+ json_end = response_text.find("```", json_start)
120
+ response_text = response_text[json_start:json_end].strip()
121
+
122
+ structure_analysis = json.loads(response_text)
123
 
124
  # Also get a natural language explanation
125
  explanation_prompt = f"""
 
131
  Keep your explanation concise and focused on the unique characteristics of this dataset.
132
  """
133
 
134
+ explanation_response = model.generate_content(explanation_prompt)
135
+ explanation = explanation_response.text
 
 
 
 
 
 
 
 
 
 
136
 
137
  return structure_analysis, explanation
138
 
 
404
  return visualizations
405
 
406
  def analyze_transaction_with_ai(transaction_data, suspicious_transactions, column_mapping):
407
+ """Use Gemini to analyze suspicious transactions and provide insights"""
408
+ api_key = os.environ.get("GEMINI_API_KEY")
409
+ if not api_key:
410
+ return "Gemini API key not found. Please add it to the Hugging Face Spaces secrets."
411
 
412
  try:
413
+ # Prepare information for Gemini, converting to a JSON-serializable format
414
  suspicious_sample = suspicious_transactions.head(5).copy()
415
 
416
  # Convert any datetime columns to string format to make it JSON serializable
 
442
  "suspicious_avg_amount": float(round(suspicious_transactions[amount_col].mean(), 2))
443
  })
444
 
445
+ # Create prompt for Gemini
446
  prompt = f"""
447
  Analyze these potentially fraudulent transactions and identify patterns or anomalies:
448
 
 
461
  3. Recommended next steps for investigation
462
  """
463
 
464
+ # Create Gemini model
465
+ model = genai.GenerativeModel('gemini-pro')
466
+
467
+ # Call Gemini API
468
+ response = model.generate_content(prompt)
 
 
 
 
 
 
 
469
 
470
  # Return the AI analysis
471
+ return response.text
472
 
473
  except Exception as e:
474
  import traceback