Spaces:
Build error
Build error
| """ | |
| Enhanced AI-Powered Fraud Detection Chatbot | |
| Clean version with improved context awareness and error handling | |
| """ | |
| import requests | |
| import json | |
| import os | |
| import time | |
| import re | |
| class AIFraudChatbot: | |
| """Enhanced AI-powered chatbot for fraud detection system""" | |
| def __init__(self, api_key=None): | |
| # Clean API key | |
| if api_key: | |
| self.api_key = api_key.strip().strip('"').strip("'") | |
| else: | |
| env_key = os.environ.get("OPENROUTER_API_KEY", "") | |
| self.api_key = env_key.strip().strip('"').strip("'") | |
| self.base_url = "https://openrouter.ai/api/v1/chat/completions" | |
| self.model = "meta-llama/llama-3.2-3b-instruct:free" | |
| # Track rate limiting | |
| self.last_request_time = 0 | |
| self.min_request_interval = 5 # seconds between API calls | |
| # Built-in responses for common questions (fallback when API unavailable) | |
| self.builtin_responses = { | |
| "fraud_detection": """**How Our Fraud Detection Works** π | |
| Our system uses a **Hybrid Quantum-Classical approach**: | |
| **1. Classical Component (80% weight)** | |
| - XGBoost Gradient Boosting model | |
| - Analyzes 10 key features: transaction amount, time, location patterns, etc. | |
| - Fast inference (~1ms per transaction) | |
| **2. Quantum Ensemble (20% weight)** | |
| - **VQC (40%)**: Variational Quantum Classifier for pattern recognition | |
| - **QAOA (30%)**: Quantum optimization for feature selection | |
| - **QNN (30%)**: Quantum Neural Network for non-linear patterns | |
| **Why Hybrid?** The quantum component excels at detecting subtle patterns that classical models might miss, while the classical model provides reliable baseline accuracy.""", | |
| "quantum_advantage": """**Quantum vs Classical: The Advantage** βοΈ | |
| **Classical Models (XGBoost)**: | |
| β High accuracy on structured data | |
| β Fast training and inference | |
| β Well-understood behavior | |
| β May miss subtle correlations | |
| **Quantum Components**: | |
| β Detects complex entangled patterns | |
| β Superior in high-dimensional feature spaces | |
| β Finds correlations invisible to classical ML | |
| β Currently limited by qubit count | |
| **Our Hybrid Approach**: | |
| - Uses classical model as the primary detector (80%) | |
| - Quantum ensemble refines predictions (20%) | |
| - Best of both worlds: reliability + pattern detection | |
| **Real Impact**: The quantum component improves edge-case detection by catching fraud patterns that classical models alone would miss.""", | |
| "precision_recall": """**Understanding Precision & Recall** π | |
| **Precision** = True Positives / (True Positives + False Positives) | |
| - "When we flag fraud, how often are we right?" | |
| - High precision = fewer false alarms | |
| **Recall** = True Positives / (True Positives + False Negatives) | |
| - "Of all actual fraud, how much do we catch?" | |
| - High recall = fewer missed fraud cases | |
| **The Trade-off**: | |
| - πΌ Increase threshold β Higher precision, lower recall | |
| - π½ Decrease threshold β Higher recall, lower precision | |
| **Business Impact**: | |
| - **Low Precision**: Customer frustration from blocked legitimate transactions | |
| - **Low Recall**: Financial losses from undetected fraud | |
| **Recommendation**: Balance based on your risk tolerance. Most businesses target ~85% recall with >70% precision.""", | |
| "threshold": """**Optimizing the Fraud Threshold** π― | |
| The threshold (default: 0.5) determines when a transaction is flagged as fraud. | |
| **When to Lower Threshold (e.g., 0.4)**: | |
| - Recall is too low (missing fraud) | |
| - High-value transactions at risk | |
| - Can handle more manual reviews | |
| **When to Raise Threshold (e.g., 0.6)**: | |
| - Too many false positives | |
| - Customer complaints about blocks | |
| - Need higher precision | |
| **Testing Approach**: | |
| 1. Start with historical data | |
| 2. Test thresholds: 0.4, 0.45, 0.5, 0.55, 0.6 | |
| 3. Calculate precision/recall for each | |
| 4. Choose based on your cost model: | |
| - Cost of fraud vs cost of false positive | |
| **Current System**: Using 0.5 threshold with 80/20 classical/quantum weighting.""", | |
| "vqc_qaoa_qnn": """**Quantum Model Components Explained** βοΈ | |
| **VQC - Variational Quantum Classifier (40% weight)** | |
| - Uses parameterized quantum circuits | |
| - Learns optimal qubit rotations during training | |
| - Best for: Binary classification tasks | |
| - 4 qubits, multiple rotation layers | |
| **QAOA - Quantum Approximate Optimization (30% weight)** | |
| - Solves optimization problems | |
| - Finds optimal feature combinations | |
| - Best for: Feature selection, pattern optimization | |
| - 2 layers of mixing and cost operators | |
| **QNN - Quantum Neural Network (30% weight)** | |
| - Deep quantum circuits with entanglement | |
| - Captures non-linear relationships | |
| - Best for: Complex pattern recognition | |
| - 3-layer architecture with strong entanglement | |
| **Why These Three?** | |
| Each captures different aspects of fraud patterns: | |
| - VQC: Direct classification | |
| - QAOA: Optimal feature weighting | |
| - QNN: Hidden correlations""", | |
| "improve_accuracy": """**Improving Model Accuracy** π | |
| **1. Data Quality** | |
| - Ensure balanced dataset (fraud vs non-fraud) | |
| - Handle missing values properly | |
| - Feature normalization is applied | |
| **2. Threshold Tuning** | |
| - Current: 0.5 (default) | |
| - Adjust based on precision/recall needs | |
| - Use ROC curve analysis | |
| **3. Feature Engineering** | |
| - Transaction velocity (txns per hour) | |
| - Geographic anomalies | |
| - Merchant category patterns | |
| - Time-based features (hour, day of week) | |
| **4. Model Ensemble** | |
| - Classical handles bulk cases well | |
| - Quantum catches edge cases | |
| - Current 80/20 split is optimized | |
| **5. Regular Retraining** | |
| - Fraud patterns evolve | |
| - Retrain monthly with new data | |
| - Monitor drift metrics | |
| **Quick Wins**: | |
| - Check for data imbalance | |
| - Verify feature scaling | |
| - Test different threshold values""", | |
| "false_positives": """**Reducing False Positives** π« | |
| False positives occur when legitimate transactions are flagged as fraud. | |
| **Common Causes**: | |
| 1. Threshold too low | |
| 2. Unusual but legitimate behavior | |
| 3. New customer patterns | |
| 4. Geographic false flags | |
| **Solutions**: | |
| **1. Raise Threshold** | |
| - Current: 0.5 β Try 0.55 or 0.6 | |
| - Trade-off: May miss some fraud | |
| **2. Customer Profiling** | |
| - Build normal behavior baselines | |
| - Flag only significant deviations | |
| **3. Velocity Checks** | |
| - Multiple small txns vs one large | |
| - Time-based patterns | |
| **4. Merchant Categories** | |
| - Trust established merchants | |
| - Scrutinize high-risk categories | |
| **5. Two-Stage Review** | |
| - Score 0.5-0.7: Soft flag (monitor) | |
| - Score >0.7: Hard flag (block) | |
| **Business Impact**: Each false positive costs customer trust. Balance carefully.""", | |
| "how_it_works": """**System Architecture Overview** ποΈ | |
| **Data Flow**: | |
| 1. Transaction arrives β Feature extraction | |
| 2. Features scaled using StandardScaler | |
| 3. Parallel processing: | |
| - Classical model (XGBoost) β 80% weight | |
| - Quantum ensemble (VQC+QAOA+QNN) β 20% weight | |
| 4. Scores combined β Final prediction | |
| 5. Threshold comparison β Fraud/Safe label | |
| **Tech Stack**: | |
| - **Backend**: FastAPI (Python) | |
| - **ML**: XGBoost, PennyLane (Quantum) | |
| - **Frontend**: Next.js, React | |
| - **Database**: 1.2M+ transactions | |
| **Real-time Processing**: | |
| - Inference time: ~50ms per transaction | |
| - Quantum simulation on CPU (production would use QPU) | |
| - Handles batch and streaming modes | |
| **Models Location**: `/models/` directory | |
| - `classical_model.joblib`: XGBoost | |
| - `vqc_weights.npy`: VQC parameters | |
| - `qaoa_weights.npy`: QAOA parameters | |
| - `qnn_weights.npy`: QNN parameters""" | |
| } | |
| # System prompt with comprehensive instructions | |
| self.system_prompt = """You are an expert AI assistant for a Hybrid Quantum-Classical Fraud Detection System. | |
| RESPONSE GUIDELINES: | |
| - Be professional, concise, and actionable | |
| - Use specific data from the current system when provided | |
| - Explain technical concepts clearly without excessive jargon | |
| - Focus on practical recommendations and insights | |
| - Keep responses under 300 words | |
| - Never fabricate statistics or provide broken formulas | |
| YOUR EXPERTISE: | |
| - Fraud detection performance analysis | |
| - Quantum-classical hybrid architecture explanation | |
| - Threshold optimization and tuning | |
| - Model performance troubleshooting | |
| - Pattern recognition in financial transactions | |
| SYSTEM ARCHITECTURE: | |
| - Hybrid Model: 80% Classical XGBoost + 20% Quantum Ensemble | |
| - Quantum Components: VQC (40%) + QAOA (30%) + QNN (30%) | |
| - Features: 10 classical features, 4 quantum-optimized features | |
| - Real-time processing with configurable thresholds | |
| KEY METRICS INTERPRETATION: | |
| - Precision = TP/(TP+FP) - Accuracy of fraud predictions | |
| - Recall = TP/(TP+FN) - Coverage of actual fraud cases | |
| - High Accuracy + Low Precision = Too many false positives | |
| - Low Recall = Missing real fraud cases | |
| COMMON ISSUES & SOLUTIONS: | |
| - 0% Precision: All flagged transactions are false positives β Increase threshold | |
| - 0% Recall: Missing actual fraud β Decrease threshold or improve features | |
| - Imbalanced data: Use weighted metrics and proper sampling | |
| Always provide specific, actionable recommendations based on the current system state.""" | |
| def _get_system_context(self, history): | |
| """Generate detailed context from current transaction history""" | |
| if not history or len(history) == 0: | |
| return "System Status: Initialized and ready. No transactions processed yet." | |
| # Calculate comprehensive metrics | |
| total = len(history) | |
| true_labels = [t.get('is_fraud', 0) for t in history] | |
| predictions = [1 if t.get('Prediction') == 'Fraud' else 0 for t in history] | |
| # Confusion matrix | |
| tp = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 1) | |
| fp = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 1) | |
| fn = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 0) | |
| tn = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 0) | |
| # Performance metrics | |
| accuracy = (tp + tn) / total if total > 0 else 0 | |
| precision = tp / (tp + fp) if (tp + fp) > 0 else 0 | |
| recall = tp / (tp + fn) if (tp + fn) > 0 else 0 | |
| f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0 | |
| # Score analysis | |
| final_scores = [t.get('Final_Score', 0) for t in history] | |
| quantum_scores = [t.get('Quantum_Score', 0) for t in history] | |
| classical_scores = [t.get('Classical_Score', 0) for t in history] | |
| avg_final = sum(final_scores) / len(final_scores) if final_scores else 0 | |
| avg_quantum = sum(quantum_scores) / len(quantum_scores) if quantum_scores else 0 | |
| avg_classical = sum(classical_scores) / len(classical_scores) if classical_scores else 0 | |
| # Fraud analysis | |
| flagged_count = sum(predictions) | |
| actual_fraud_count = sum(true_labels) | |
| # Generate context summary | |
| context = f""" | |
| CURRENT SYSTEM ANALYSIS: | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| π TRANSACTION OVERVIEW: | |
| β’ Total Processed: {total:,} transactions | |
| β’ Flagged as Fraud: {flagged_count} ({flagged_count/total*100:.1f}% rate) | |
| β’ Actual Fraud Cases: {actual_fraud_count} ({actual_fraud_count/total*100:.1f}% rate) | |
| β‘ PERFORMANCE METRICS: | |
| β’ Accuracy: {accuracy*100:.1f}% (Overall correctness) | |
| β’ Precision: {precision*100:.1f}% (Fraud prediction accuracy) | |
| β’ Recall: {recall*100:.1f}% (Fraud detection coverage) | |
| β’ F1-Score: {f1:.3f} (Balanced performance measure) | |
| π DETECTION BREAKDOWN: | |
| β’ True Positives (Correct Fraud): {tp} | |
| β’ False Positives (Safe β Fraud): {fp} {"β οΈ HIGH!" if fp > tp * 2 else ""} | |
| β’ False Negatives (Fraud β Safe): {fn} {"π¨ CRITICAL!" if fn > 0 else ""} | |
| β’ True Negatives (Correct Safe): {tn} | |
| βοΈ HYBRID MODEL SCORES: | |
| β’ Average Final Score: {avg_final:.4f} | |
| β’ Classical Component: {avg_classical:.4f} (80% weight) | |
| β’ Quantum Ensemble: {avg_quantum:.4f} (20% weight) | |
| π― THRESHOLD ANALYSIS: | |
| β’ Current Threshold: 0.5 (configurable) | |
| β’ Scores Range: {min(final_scores):.3f} - {max(final_scores):.3f} | |
| {'β’ RECOMMENDATION: Consider adjusting threshold based on precision/recall balance' if precision < 0.5 or recall < 0.5 else ''} | |
| π¬ QUANTUM COMPONENT STATUS: | |
| β’ VQC (Pattern Recognition): Active | |
| β’ QAOA (Optimization): Active | |
| β’ QNN (Neural Processing): Active | |
| """ | |
| # In the _get_system_context method, add threshold analysis | |
| if precision < 0.7 and recall < 0.7: | |
| context += """ | |
| π― THRESHOLD RECOMMENDATION: | |
| Current threshold may be suboptimal. Consider: | |
| β’ Lower threshold (0.45) to improve recall | |
| β’ Higher threshold (0.65) to improve precision | |
| β’ Current performance suggests class imbalance | |
| """ | |
| return context | |
| def get_response(self, user_message, history=None): | |
| """Generate AI response with enhanced context and error handling""" | |
| # Try built-in response first (always available, no rate limits) | |
| builtin_response = self._get_builtin_response(user_message) | |
| # Rate limiting check | |
| current_time = time.time() | |
| if current_time - self.last_request_time < self.min_request_interval: | |
| if builtin_response: | |
| return builtin_response + "\n\nπ‘ *Response from built-in knowledge base*" | |
| else: | |
| wait_time = int(self.min_request_interval - (current_time - self.last_request_time)) | |
| return f"β³ Please wait {wait_time} seconds between questions to avoid rate limits." | |
| # Validate API key | |
| if not self.api_key or self.api_key == "" or not self.api_key.startswith("sk-or-v1-"): | |
| if builtin_response: | |
| return builtin_response + "\n\nπ‘ *Response from built-in knowledge base (AI service not configured)*" | |
| return """π€ **AI Assistant Configuration Required** | |
| To enable intelligent fraud analysis: | |
| 1. **Get Free API Key**: Visit [OpenRouter.ai](https://openrouter.ai) | |
| 2. **Configure**: Create a `.env` file with: | |
| ``` | |
| OPENROUTER_API_KEY=sk-or-v1-your-key-here | |
| ``` | |
| 3. **Restart**: The backend server | |
| **Current Status**: Core fraud detection system is fully operational. Built-in responses available for common questions.""" | |
| # Build context from system state | |
| system_context = self._get_system_context(history) if history else "System ready for analysis." | |
| # Construct conversation | |
| messages = [ | |
| {"role": "system", "content": self.system_prompt}, | |
| {"role": "user", "content": f""" | |
| CURRENT SYSTEM DATA: | |
| {system_context} | |
| USER QUESTION: {user_message} | |
| Please provide a clear, specific response using the current system metrics. Focus on actionable insights and practical recommendations. | |
| """} | |
| ] | |
| try: | |
| self.last_request_time = current_time | |
| # Make API request | |
| response = requests.post( | |
| self.base_url, | |
| headers={ | |
| "Authorization": f"Bearer {self.api_key}", | |
| "Content-Type": "application/json", | |
| "HTTP-Referer": "https://quantum-fraud-detector.local", | |
| "X-Title": "Quantum Fraud Detection System" | |
| }, | |
| json={ | |
| "model": self.model, | |
| "messages": messages, | |
| "max_tokens": 400, | |
| "temperature": 0.3, | |
| "top_p": 0.9, | |
| "frequency_penalty": 0.2, | |
| "presence_penalty": 0.1 | |
| }, | |
| timeout=30 | |
| ) | |
| # Handle successful response | |
| if response.status_code == 200: | |
| result = response.json() | |
| ai_response = result['choices'][0]['message']['content'].strip() | |
| ai_response = ai_response.replace('```', '').replace('**', '**') | |
| if any(keyword in user_message.lower() for keyword in ['quantum', 'vqc', 'qaoa', 'qnn']): | |
| ai_response += "\n\nπ‘ **Quick Reference**: VQC = Variational Quantum Circuit, QAOA = Quantum Optimization, QNN = Quantum Neural Network" | |
| if any(keyword in user_message.lower() for keyword in ['precision', 'recall', 'f1']): | |
| ai_response += "\n\nπ **Metric Tip**: Balance precision (fraud accuracy) vs recall (fraud coverage) based on business priorities." | |
| return ai_response | |
| # Handle API errors - use builtin fallback | |
| elif response.status_code == 429: | |
| if builtin_response: | |
| return builtin_response + "\n\nπ‘ *Response from built-in knowledge base (AI rate limited)*" | |
| return """β° **Rate Limit Exceeded** | |
| Too many requests to the AI service. Try asking about: | |
| β’ How fraud detection works | |
| β’ Quantum vs classical advantage | |
| β’ Precision and recall | |
| β’ Threshold optimization | |
| These topics have built-in responses available!""" | |
| elif response.status_code in [401, 402]: | |
| if builtin_response: | |
| return builtin_response + "\n\nπ‘ *Response from built-in knowledge base*" | |
| return """π **AI Service Issue** | |
| API authentication failed. Built-in responses available for common questions about: | |
| β’ Fraud detection methodology | |
| β’ Quantum model components | |
| β’ Performance optimization""" | |
| else: | |
| if builtin_response: | |
| return builtin_response + "\n\nπ‘ *Response from built-in knowledge base*" | |
| return f"β οΈ AI service returned status {response.status_code}. Try asking about fraud detection basics." | |
| except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.exceptions.RequestException): | |
| if builtin_response: | |
| return builtin_response + "\n\nπ‘ *Response from built-in knowledge base (network issue)*" | |
| return """π **Connection Issue** | |
| Unable to reach AI service. The fraud detection system is working normally. | |
| **Available Topics** (no network needed): | |
| β’ "How does fraud detection work?" | |
| β’ "Explain quantum advantage" | |
| β’ "What is precision vs recall?" | |
| β’ "How to reduce false positives?" """ | |
| except Exception as e: | |
| if builtin_response: | |
| return builtin_response | |
| return "π οΈ An error occurred. Core fraud detection remains operational." | |
| def _get_builtin_response(self, user_message): | |
| """Match user question to built-in responses""" | |
| message_lower = user_message.lower() | |
| # Pattern matching for common questions | |
| patterns = { | |
| r'(how|does|what).*(fraud|detection|detect|work|identify)': 'fraud_detection', | |
| r'(quantum|classical|advantage|better|compare|difference|useful)': 'quantum_advantage', | |
| r'(precision|recall|f1|metric|score|accuracy)': 'precision_recall', | |
| r'(threshold|cutoff|adjust|tune|optimal)': 'threshold', | |
| r'(vqc|qaoa|qnn|variational|quantum.*(model|circuit|neural))': 'vqc_qaoa_qnn', | |
| r'(improve|increase|better|accuracy|performance|optimize)': 'improve_accuracy', | |
| r'(false.?positive|wrong|mistake|legitimate|block)': 'false_positives', | |
| r'(architecture|system|how.*work|overview|explain.*system)': 'how_it_works', | |
| } | |
| for pattern, key in patterns.items(): | |
| if re.search(pattern, message_lower): | |
| return self.builtin_responses.get(key) | |
| return None | |
| # --- Test Function --- | |
| def test_chatbot(): | |
| """Test the enhanced chatbot functionality""" | |
| print("π§ͺ Testing Enhanced AI Fraud Detection Assistant") | |
| print("=" * 60) | |
| api_key = os.getenv("OPENROUTER_API_KEY", "") | |
| if not api_key: | |
| print("β No API key found. Set OPENROUTER_API_KEY environment variable.") | |
| return | |
| chatbot = AIFraudChatbot(api_key=api_key) | |
| # Simulate realistic transaction history | |
| test_history = [] | |
| for i in range(100): | |
| test_history.append({ | |
| 'Prediction': 'Fraud' if i % 20 == 0 else 'Safe', # 5% fraud rate | |
| 'is_fraud': 1 if i % 25 == 0 else 0, # 4% actual fraud | |
| 'Final_Score': 0.6 + (i % 5) * 0.1, # Varied scores | |
| 'Quantum_Score': 0.3 + (i % 3) * 0.1, | |
| 'Classical_Score': 0.7 + (i % 4) * 0.05 | |
| }) | |
| test_questions = [ | |
| "Why is my precision so low?", | |
| "How can I improve recall performance?", | |
| "Explain how the quantum ensemble works", | |
| "What's the optimal fraud threshold?", | |
| "How do false positives impact my business?" | |
| ] | |
| for i, question in enumerate(test_questions, 1): | |
| print(f"\nπΈ Test {i}: {question}") | |
| print("-" * 50) | |
| response = chatbot.get_response(question, test_history) | |
| print(response[:200] + "..." if len(response) > 200 else response) | |
| time.sleep(1) # Respectful rate limiting | |
| if __name__ == "__main__": | |
| test_chatbot() |