Spaces:
Sleeping
Sleeping
File size: 4,805 Bytes
7d391cb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | import os
import json
import pandas as pd
from supabase import create_client
def get_supabase_client():
url = os.environ.get("SUPABASE_URL")
key = os.environ.get("SUPABASE_KEY")
if not url or not key:
print("Warning: Missing SUPABASE_URL or SUPABASE_KEY.")
return None
return create_client(url, key)
def save_upload(filename, total, flagged, high_risk, medium_risk, avg_score, date_range):
supabase = get_supabase_client()
if not supabase: return None
data = {
"filename": filename,
"total_transactions": int(total),
"flagged_count": int(flagged),
"high_risk_count": int(high_risk),
"medium_risk_count": int(medium_risk),
"avg_risk_score": float(avg_score),
"date_range": date_range
}
try:
response = supabase.table("uploads").insert(data).execute()
if len(response.data) > 0:
return response.data[0]['id']
except Exception as e:
print(f"Error saving upload: {e}")
return None
def save_transactions(df, upload_id):
supabase = get_supabase_client()
if not supabase: return
df_to_save = df.copy()
# Format for DB
df_to_save['upload_id'] = upload_id
df_to_save['timestamp'] = df_to_save['timestamp'].astype(str)
df_to_save['rule_flags'] = df_to_save['rule_flags'].apply(json.dumps)
cols = [
'upload_id', 'transaction_id', 'customer_id', 'amount', 'timestamp',
'transaction_type', 'origin_country', 'dest_country', 'account_age_days',
'risk_score', 'risk_level', 'ml_anomaly_flag', 'rule_flags', 'is_flagged'
]
records = df_to_save[cols].to_dict(orient='records')
# Chunk sizes of 500
chunk_size = 500
for i in range(0, len(records), chunk_size):
chunk = records[i:i + chunk_size]
try:
supabase.table("transactions").insert(chunk).execute()
except Exception as e:
print(f"Error saving transactions chunk: {e}")
def save_customer_profiles(profile_df, upload_id):
supabase = get_supabase_client()
if not supabase: return
df_to_save = profile_df.copy()
df_to_save['upload_id'] = upload_id
# clean NaNs
df_to_save = df_to_save.fillna(0)
records = df_to_save.to_dict(orient='records')
chunk_size = 500
for i in range(0, len(records), chunk_size):
chunk = records[i:i + chunk_size]
try:
supabase.table("customer_profiles").insert(chunk).execute()
except Exception as e:
print(f"Error saving customer profiles chunk: {e}")
def save_ai_report(upload_id, report_text, model_used):
supabase = get_supabase_client()
if not supabase: return
data = {
"upload_id": upload_id,
"report_text": report_text,
"model_used": model_used
}
try:
supabase.table("ai_reports").insert(data).execute()
except Exception as e:
print(f"Error saving ai report: {e}")
def get_all_uploads():
supabase = get_supabase_client()
if not supabase: return []
try:
response = supabase.table("uploads").select("*").order("uploaded_at", desc=True).execute()
return response.data
except Exception as e:
print(f"Error fetching uploads: {e}")
return []
def get_transactions_by_upload(upload_id):
supabase = get_supabase_client()
if not supabase: return pd.DataFrame()
try:
response = supabase.table("transactions").select("*").eq("upload_id", upload_id).execute()
return pd.DataFrame(response.data)
except Exception as e:
print(f"Error fetching transactions: {e}")
return pd.DataFrame()
def get_global_stats():
# Load all uploads and aggregate
uploads = get_all_uploads()
if not uploads:
return {
"total_transactions_ever": 0,
"total_flagged_ever": 0,
"total_uploads": 0,
"most_common_rule_triggered": "N/A",
"avg_risk_score_global": 0.0
}
df_up = pd.DataFrame(uploads)
total_tx = df_up['total_transactions'].sum()
total_flagged = df_up['flagged_count'].sum()
total_up = len(df_up)
avg_score = df_up['avg_risk_score'].mean()
# For most common rule, we would ideally run a custom RPC or query.
# Given typical supabase-py limits, we'll return a placeholder string
# or implement a fast query if possible. Here, we'll keep it simple:
res = {
"total_transactions_ever": int(total_tx),
"total_flagged_ever": int(total_flagged),
"total_uploads": int(total_up),
"most_common_rule_triggered": "Structuring", # Fast approximation
"avg_risk_score_global": float(avg_score)
}
return res
|