File size: 7,374 Bytes
63590dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import pandas as pd
import numpy as np
from fpdf import FPDF
from datetime import datetime

class ReportGenerator:
    """Generate CSV and PDF reports for fraud detection analysis"""
    
    def __init__(self):
        self.report_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    def generate_csv_report(self, history, filename=None):
        """Generate CSV report from transaction history"""
        if not history or len(history) == 0:
            return None
        
        if filename is None:
            filename = f"fraud_report_{self.report_timestamp}.csv"
        
        # Convert history to DataFrame
        df = pd.DataFrame(history)
        
        # Select relevant columns
        report_cols = ['trans_num', 'Amount', 'merchant', 'category', 
                      'Final_Score', 'Prediction', 'is_fraud']
        
        available_cols = [col for col in report_cols if col in df.columns]
        report_df = df[available_cols]
        
        # Save to CSV
        report_df.to_csv(filename, index=False)
        
        return filename
    
    def generate_pdf_report(self, history, filename=None):
        """Generate PDF report with summary statistics"""
        if not history or len(history) == 0:
            return None
        
        if filename is None:
            filename = f"fraud_report_{self.report_timestamp}.pdf"
        
        # Calculate metrics
        metrics = self._calculate_metrics(history)
        
        # Create PDF
        pdf = FPDF()
        pdf.add_page()
        
        # Title
        pdf.set_font('Arial', 'B', 20)
        pdf.cell(0, 10, 'Fraud Detection Report', 0, 1, 'C')
        
        pdf.set_font('Arial', '', 10)
        pdf.cell(0, 10, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1, 'C')
        pdf.ln(10)
        
        # Executive Summary
        pdf.set_font('Arial', 'B', 14)
        pdf.cell(0, 10, 'Executive Summary', 0, 1, 'L')
        pdf.set_font('Arial', '', 11)
        
        summary_text = [
            f"Total Transactions Processed: {metrics['total_transactions']}",
            f"Transactions Flagged as Fraud: {metrics['flagged_fraud']}",
            f"Actual Fraud Cases: {metrics['actual_fraud']}",
            f"Overall Fraud Rate: {metrics['fraud_rate']:.2f}%",
            "",
            f"System Accuracy: {metrics['accuracy']:.2f}%",
            f"Precision: {metrics['precision']:.2f}%",
            f"Recall (Fraud Detection Rate): {metrics['recall']:.2f}%",
            f"F1 Score: {metrics['f1_score']:.3f}"
        ]
        
        for line in summary_text:
            pdf.cell(0, 7, line, 0, 1, 'L')
        
        pdf.ln(5)
        
        # Performance Metrics
        pdf.set_font('Arial', 'B', 14)
        pdf.cell(0, 10, 'Performance Breakdown', 0, 1, 'L')
        pdf.set_font('Arial', '', 11)
        
        performance_text = [
            f"True Positives (Correctly Identified Fraud): {metrics['tp']}",
            f"True Negatives (Correctly Identified Safe): {metrics['tn']}",
            f"False Positives (Safe Flagged as Fraud): {metrics['fp']}",
            f"False Negatives (Missed Fraud): {metrics['fn']}"
        ]
        
        for line in performance_text:
            pdf.cell(0, 7, line, 0, 1, 'L')
        
        pdf.ln(5)
        
        # Top Flagged Transactions
        pdf.set_font('Arial', 'B', 14)
        pdf.cell(0, 10, 'Top 10 Highest Risk Transactions', 0, 1, 'L')
        pdf.set_font('Arial', '', 9)
        
        # Get top transactions by score
        sorted_history = sorted(history, key=lambda x: x.get('Final_Score', 0), reverse=True)[:10]
        
        for i, txn in enumerate(sorted_history, 1):
            line = f"{i}. Amount: ${txn.get('Amount', 0):.2f} | Score: {txn.get('Final_Score', 0):.3f} | "
            line += f"Prediction: {txn.get('Prediction', 'N/A')} | Actual: {'Fraud' if txn.get('is_fraud', 0) == 1 else 'Safe'}"
            pdf.cell(0, 5, line, 0, 1, 'L')
        
        pdf.ln(5)
        
        # Model Architecture
        pdf.set_font('Arial', 'B', 14)
        pdf.cell(0, 10, 'Hybrid Model Architecture', 0, 1, 'L')
        pdf.set_font('Arial', '', 10)
        
        arch_text = [
            "This system uses a hybrid quantum-classical approach:",
            "",
            "Classical Model (XGBoost) - 80% weight:",
            "  - Processes 10 engineered features",
            "  - High throughput, low latency",
            "  - Handles majority of fraud detection",
            "",
            "Quantum Model (VQC) - 20% weight:",
            "  - Focuses on 4 critical features",
            "  - Specialized anomaly detection",
            "  - Captures subtle non-linear patterns",
            "",
            "Final Decision = 0.8 * Classical + 0.2 * Quantum"
        ]
        
        for line in arch_text:
            pdf.cell(0, 5, line, 0, 1, 'L')
        
        # Save PDF
        pdf.output(filename)
        
        return filename
    
    def _calculate_metrics(self, history):
        """Calculate performance metrics from history"""
        true_labels = [t.get('is_fraud', 0) for t in history]
        predictions = [1 if t.get('Prediction') == 'Fraud' else 0 for t in history]
        
        total = len(true_labels)
        flagged = sum(predictions)
        actual_fraud = sum(true_labels)
        
        # Confusion matrix
        tp = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 1)
        fp = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 1)
        fn = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 0)
        tn = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 0)
        
        # Calculate metrics
        accuracy = (tp + tn) / total if total > 0 else 0
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        fraud_rate = (actual_fraud / total * 100) if total > 0 else 0
        
        return {
            'total_transactions': total,
            'flagged_fraud': flagged,
            'actual_fraud': actual_fraud,
            'fraud_rate': fraud_rate,
            'accuracy': accuracy * 100,
            'precision': precision * 100,
            'recall': recall * 100,
            'f1_score': f1,
            'tp': tp,
            'fp': fp,
            'fn': fn,
            'tn': tn
        }

if __name__ == "__main__":
    # Test with sample data
    sample_history = [
        {'trans_num': '001', 'Amount': 150.0, 'merchant': 'Test Store', 
         'category': 'retail', 'Final_Score': 0.75, 'Prediction': 'Fraud', 'is_fraud': 1},
        {'trans_num': '002', 'Amount': 25.0, 'merchant': 'Coffee Shop', 
         'category': 'food', 'Final_Score': 0.15, 'Prediction': 'Safe', 'is_fraud': 0},
    ]
    
    generator = ReportGenerator()
    csv_file = generator.generate_csv_report(sample_history)
    pdf_file = generator.generate_pdf_report(sample_history)
    
    print(f"Test reports generated: {csv_file}, {pdf_file}")