Spaces:
Build error
Build error
File size: 7,374 Bytes
63590dc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | import pandas as pd
import numpy as np
from fpdf import FPDF
from datetime import datetime
class ReportGenerator:
"""Generate CSV and PDF reports for fraud detection analysis"""
def __init__(self):
self.report_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
def generate_csv_report(self, history, filename=None):
"""Generate CSV report from transaction history"""
if not history or len(history) == 0:
return None
if filename is None:
filename = f"fraud_report_{self.report_timestamp}.csv"
# Convert history to DataFrame
df = pd.DataFrame(history)
# Select relevant columns
report_cols = ['trans_num', 'Amount', 'merchant', 'category',
'Final_Score', 'Prediction', 'is_fraud']
available_cols = [col for col in report_cols if col in df.columns]
report_df = df[available_cols]
# Save to CSV
report_df.to_csv(filename, index=False)
return filename
def generate_pdf_report(self, history, filename=None):
"""Generate PDF report with summary statistics"""
if not history or len(history) == 0:
return None
if filename is None:
filename = f"fraud_report_{self.report_timestamp}.pdf"
# Calculate metrics
metrics = self._calculate_metrics(history)
# Create PDF
pdf = FPDF()
pdf.add_page()
# Title
pdf.set_font('Arial', 'B', 20)
pdf.cell(0, 10, 'Fraud Detection Report', 0, 1, 'C')
pdf.set_font('Arial', '', 10)
pdf.cell(0, 10, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1, 'C')
pdf.ln(10)
# Executive Summary
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Executive Summary', 0, 1, 'L')
pdf.set_font('Arial', '', 11)
summary_text = [
f"Total Transactions Processed: {metrics['total_transactions']}",
f"Transactions Flagged as Fraud: {metrics['flagged_fraud']}",
f"Actual Fraud Cases: {metrics['actual_fraud']}",
f"Overall Fraud Rate: {metrics['fraud_rate']:.2f}%",
"",
f"System Accuracy: {metrics['accuracy']:.2f}%",
f"Precision: {metrics['precision']:.2f}%",
f"Recall (Fraud Detection Rate): {metrics['recall']:.2f}%",
f"F1 Score: {metrics['f1_score']:.3f}"
]
for line in summary_text:
pdf.cell(0, 7, line, 0, 1, 'L')
pdf.ln(5)
# Performance Metrics
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Performance Breakdown', 0, 1, 'L')
pdf.set_font('Arial', '', 11)
performance_text = [
f"True Positives (Correctly Identified Fraud): {metrics['tp']}",
f"True Negatives (Correctly Identified Safe): {metrics['tn']}",
f"False Positives (Safe Flagged as Fraud): {metrics['fp']}",
f"False Negatives (Missed Fraud): {metrics['fn']}"
]
for line in performance_text:
pdf.cell(0, 7, line, 0, 1, 'L')
pdf.ln(5)
# Top Flagged Transactions
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Top 10 Highest Risk Transactions', 0, 1, 'L')
pdf.set_font('Arial', '', 9)
# Get top transactions by score
sorted_history = sorted(history, key=lambda x: x.get('Final_Score', 0), reverse=True)[:10]
for i, txn in enumerate(sorted_history, 1):
line = f"{i}. Amount: ${txn.get('Amount', 0):.2f} | Score: {txn.get('Final_Score', 0):.3f} | "
line += f"Prediction: {txn.get('Prediction', 'N/A')} | Actual: {'Fraud' if txn.get('is_fraud', 0) == 1 else 'Safe'}"
pdf.cell(0, 5, line, 0, 1, 'L')
pdf.ln(5)
# Model Architecture
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, 'Hybrid Model Architecture', 0, 1, 'L')
pdf.set_font('Arial', '', 10)
arch_text = [
"This system uses a hybrid quantum-classical approach:",
"",
"Classical Model (XGBoost) - 80% weight:",
" - Processes 10 engineered features",
" - High throughput, low latency",
" - Handles majority of fraud detection",
"",
"Quantum Model (VQC) - 20% weight:",
" - Focuses on 4 critical features",
" - Specialized anomaly detection",
" - Captures subtle non-linear patterns",
"",
"Final Decision = 0.8 * Classical + 0.2 * Quantum"
]
for line in arch_text:
pdf.cell(0, 5, line, 0, 1, 'L')
# Save PDF
pdf.output(filename)
return filename
def _calculate_metrics(self, history):
"""Calculate performance metrics from history"""
true_labels = [t.get('is_fraud', 0) for t in history]
predictions = [1 if t.get('Prediction') == 'Fraud' else 0 for t in history]
total = len(true_labels)
flagged = sum(predictions)
actual_fraud = sum(true_labels)
# Confusion matrix
tp = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 1)
fp = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 1)
fn = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 0)
tn = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 0)
# Calculate metrics
accuracy = (tp + tn) / total if total > 0 else 0
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
fraud_rate = (actual_fraud / total * 100) if total > 0 else 0
return {
'total_transactions': total,
'flagged_fraud': flagged,
'actual_fraud': actual_fraud,
'fraud_rate': fraud_rate,
'accuracy': accuracy * 100,
'precision': precision * 100,
'recall': recall * 100,
'f1_score': f1,
'tp': tp,
'fp': fp,
'fn': fn,
'tn': tn
}
if __name__ == "__main__":
# Test with sample data
sample_history = [
{'trans_num': '001', 'Amount': 150.0, 'merchant': 'Test Store',
'category': 'retail', 'Final_Score': 0.75, 'Prediction': 'Fraud', 'is_fraud': 1},
{'trans_num': '002', 'Amount': 25.0, 'merchant': 'Coffee Shop',
'category': 'food', 'Final_Score': 0.15, 'Prediction': 'Safe', 'is_fraud': 0},
]
generator = ReportGenerator()
csv_file = generator.generate_csv_report(sample_history)
pdf_file = generator.generate_pdf_report(sample_history)
print(f"Test reports generated: {csv_file}, {pdf_file}") |