QuantumShield / backend /enhanced_chatbot.py
SantoshKumar1310's picture
Upload folder using huggingface_hub
63590dc verified
"""
Enhanced AI-Powered Fraud Detection Chatbot
Clean version with improved context awareness and error handling
"""
import requests
import json
import os
import time
import re
class AIFraudChatbot:
"""Enhanced AI-powered chatbot for fraud detection system"""
def __init__(self, api_key=None):
# Clean API key
if api_key:
self.api_key = api_key.strip().strip('"').strip("'")
else:
env_key = os.environ.get("OPENROUTER_API_KEY", "")
self.api_key = env_key.strip().strip('"').strip("'")
self.base_url = "https://openrouter.ai/api/v1/chat/completions"
self.model = "meta-llama/llama-3.2-3b-instruct:free"
# Track rate limiting
self.last_request_time = 0
self.min_request_interval = 5 # seconds between API calls
# Built-in responses for common questions (fallback when API unavailable)
self.builtin_responses = {
"fraud_detection": """**How Our Fraud Detection Works** πŸ”
Our system uses a **Hybrid Quantum-Classical approach**:
**1. Classical Component (80% weight)**
- XGBoost Gradient Boosting model
- Analyzes 10 key features: transaction amount, time, location patterns, etc.
- Fast inference (~1ms per transaction)
**2. Quantum Ensemble (20% weight)**
- **VQC (40%)**: Variational Quantum Classifier for pattern recognition
- **QAOA (30%)**: Quantum optimization for feature selection
- **QNN (30%)**: Quantum Neural Network for non-linear patterns
**Why Hybrid?** The quantum component excels at detecting subtle patterns that classical models might miss, while the classical model provides reliable baseline accuracy.""",
"quantum_advantage": """**Quantum vs Classical: The Advantage** βš›οΈ
**Classical Models (XGBoost)**:
βœ… High accuracy on structured data
βœ… Fast training and inference
βœ… Well-understood behavior
❌ May miss subtle correlations
**Quantum Components**:
βœ… Detects complex entangled patterns
βœ… Superior in high-dimensional feature spaces
βœ… Finds correlations invisible to classical ML
❌ Currently limited by qubit count
**Our Hybrid Approach**:
- Uses classical model as the primary detector (80%)
- Quantum ensemble refines predictions (20%)
- Best of both worlds: reliability + pattern detection
**Real Impact**: The quantum component improves edge-case detection by catching fraud patterns that classical models alone would miss.""",
"precision_recall": """**Understanding Precision & Recall** πŸ“Š
**Precision** = True Positives / (True Positives + False Positives)
- "When we flag fraud, how often are we right?"
- High precision = fewer false alarms
**Recall** = True Positives / (True Positives + False Negatives)
- "Of all actual fraud, how much do we catch?"
- High recall = fewer missed fraud cases
**The Trade-off**:
- πŸ”Ό Increase threshold β†’ Higher precision, lower recall
- πŸ”½ Decrease threshold β†’ Higher recall, lower precision
**Business Impact**:
- **Low Precision**: Customer frustration from blocked legitimate transactions
- **Low Recall**: Financial losses from undetected fraud
**Recommendation**: Balance based on your risk tolerance. Most businesses target ~85% recall with >70% precision.""",
"threshold": """**Optimizing the Fraud Threshold** 🎯
The threshold (default: 0.5) determines when a transaction is flagged as fraud.
**When to Lower Threshold (e.g., 0.4)**:
- Recall is too low (missing fraud)
- High-value transactions at risk
- Can handle more manual reviews
**When to Raise Threshold (e.g., 0.6)**:
- Too many false positives
- Customer complaints about blocks
- Need higher precision
**Testing Approach**:
1. Start with historical data
2. Test thresholds: 0.4, 0.45, 0.5, 0.55, 0.6
3. Calculate precision/recall for each
4. Choose based on your cost model:
- Cost of fraud vs cost of false positive
**Current System**: Using 0.5 threshold with 80/20 classical/quantum weighting.""",
"vqc_qaoa_qnn": """**Quantum Model Components Explained** βš›οΈ
**VQC - Variational Quantum Classifier (40% weight)**
- Uses parameterized quantum circuits
- Learns optimal qubit rotations during training
- Best for: Binary classification tasks
- 4 qubits, multiple rotation layers
**QAOA - Quantum Approximate Optimization (30% weight)**
- Solves optimization problems
- Finds optimal feature combinations
- Best for: Feature selection, pattern optimization
- 2 layers of mixing and cost operators
**QNN - Quantum Neural Network (30% weight)**
- Deep quantum circuits with entanglement
- Captures non-linear relationships
- Best for: Complex pattern recognition
- 3-layer architecture with strong entanglement
**Why These Three?**
Each captures different aspects of fraud patterns:
- VQC: Direct classification
- QAOA: Optimal feature weighting
- QNN: Hidden correlations""",
"improve_accuracy": """**Improving Model Accuracy** πŸ“ˆ
**1. Data Quality**
- Ensure balanced dataset (fraud vs non-fraud)
- Handle missing values properly
- Feature normalization is applied
**2. Threshold Tuning**
- Current: 0.5 (default)
- Adjust based on precision/recall needs
- Use ROC curve analysis
**3. Feature Engineering**
- Transaction velocity (txns per hour)
- Geographic anomalies
- Merchant category patterns
- Time-based features (hour, day of week)
**4. Model Ensemble**
- Classical handles bulk cases well
- Quantum catches edge cases
- Current 80/20 split is optimized
**5. Regular Retraining**
- Fraud patterns evolve
- Retrain monthly with new data
- Monitor drift metrics
**Quick Wins**:
- Check for data imbalance
- Verify feature scaling
- Test different threshold values""",
"false_positives": """**Reducing False Positives** 🚫
False positives occur when legitimate transactions are flagged as fraud.
**Common Causes**:
1. Threshold too low
2. Unusual but legitimate behavior
3. New customer patterns
4. Geographic false flags
**Solutions**:
**1. Raise Threshold**
- Current: 0.5 β†’ Try 0.55 or 0.6
- Trade-off: May miss some fraud
**2. Customer Profiling**
- Build normal behavior baselines
- Flag only significant deviations
**3. Velocity Checks**
- Multiple small txns vs one large
- Time-based patterns
**4. Merchant Categories**
- Trust established merchants
- Scrutinize high-risk categories
**5. Two-Stage Review**
- Score 0.5-0.7: Soft flag (monitor)
- Score >0.7: Hard flag (block)
**Business Impact**: Each false positive costs customer trust. Balance carefully.""",
"how_it_works": """**System Architecture Overview** πŸ—οΈ
**Data Flow**:
1. Transaction arrives β†’ Feature extraction
2. Features scaled using StandardScaler
3. Parallel processing:
- Classical model (XGBoost) β†’ 80% weight
- Quantum ensemble (VQC+QAOA+QNN) β†’ 20% weight
4. Scores combined β†’ Final prediction
5. Threshold comparison β†’ Fraud/Safe label
**Tech Stack**:
- **Backend**: FastAPI (Python)
- **ML**: XGBoost, PennyLane (Quantum)
- **Frontend**: Next.js, React
- **Database**: 1.2M+ transactions
**Real-time Processing**:
- Inference time: ~50ms per transaction
- Quantum simulation on CPU (production would use QPU)
- Handles batch and streaming modes
**Models Location**: `/models/` directory
- `classical_model.joblib`: XGBoost
- `vqc_weights.npy`: VQC parameters
- `qaoa_weights.npy`: QAOA parameters
- `qnn_weights.npy`: QNN parameters"""
}
# System prompt with comprehensive instructions
self.system_prompt = """You are an expert AI assistant for a Hybrid Quantum-Classical Fraud Detection System.
RESPONSE GUIDELINES:
- Be professional, concise, and actionable
- Use specific data from the current system when provided
- Explain technical concepts clearly without excessive jargon
- Focus on practical recommendations and insights
- Keep responses under 300 words
- Never fabricate statistics or provide broken formulas
YOUR EXPERTISE:
- Fraud detection performance analysis
- Quantum-classical hybrid architecture explanation
- Threshold optimization and tuning
- Model performance troubleshooting
- Pattern recognition in financial transactions
SYSTEM ARCHITECTURE:
- Hybrid Model: 80% Classical XGBoost + 20% Quantum Ensemble
- Quantum Components: VQC (40%) + QAOA (30%) + QNN (30%)
- Features: 10 classical features, 4 quantum-optimized features
- Real-time processing with configurable thresholds
KEY METRICS INTERPRETATION:
- Precision = TP/(TP+FP) - Accuracy of fraud predictions
- Recall = TP/(TP+FN) - Coverage of actual fraud cases
- High Accuracy + Low Precision = Too many false positives
- Low Recall = Missing real fraud cases
COMMON ISSUES & SOLUTIONS:
- 0% Precision: All flagged transactions are false positives β†’ Increase threshold
- 0% Recall: Missing actual fraud β†’ Decrease threshold or improve features
- Imbalanced data: Use weighted metrics and proper sampling
Always provide specific, actionable recommendations based on the current system state."""
def _get_system_context(self, history):
"""Generate detailed context from current transaction history"""
if not history or len(history) == 0:
return "System Status: Initialized and ready. No transactions processed yet."
# Calculate comprehensive metrics
total = len(history)
true_labels = [t.get('is_fraud', 0) for t in history]
predictions = [1 if t.get('Prediction') == 'Fraud' else 0 for t in history]
# Confusion matrix
tp = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 1)
fp = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 1)
fn = sum(1 for t, p in zip(true_labels, predictions) if t == 1 and p == 0)
tn = sum(1 for t, p in zip(true_labels, predictions) if t == 0 and p == 0)
# Performance metrics
accuracy = (tp + tn) / total if total > 0 else 0
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
# Score analysis
final_scores = [t.get('Final_Score', 0) for t in history]
quantum_scores = [t.get('Quantum_Score', 0) for t in history]
classical_scores = [t.get('Classical_Score', 0) for t in history]
avg_final = sum(final_scores) / len(final_scores) if final_scores else 0
avg_quantum = sum(quantum_scores) / len(quantum_scores) if quantum_scores else 0
avg_classical = sum(classical_scores) / len(classical_scores) if classical_scores else 0
# Fraud analysis
flagged_count = sum(predictions)
actual_fraud_count = sum(true_labels)
# Generate context summary
context = f"""
CURRENT SYSTEM ANALYSIS:
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
πŸ“Š TRANSACTION OVERVIEW:
β€’ Total Processed: {total:,} transactions
β€’ Flagged as Fraud: {flagged_count} ({flagged_count/total*100:.1f}% rate)
β€’ Actual Fraud Cases: {actual_fraud_count} ({actual_fraud_count/total*100:.1f}% rate)
⚑ PERFORMANCE METRICS:
β€’ Accuracy: {accuracy*100:.1f}% (Overall correctness)
β€’ Precision: {precision*100:.1f}% (Fraud prediction accuracy)
β€’ Recall: {recall*100:.1f}% (Fraud detection coverage)
β€’ F1-Score: {f1:.3f} (Balanced performance measure)
πŸ” DETECTION BREAKDOWN:
β€’ True Positives (Correct Fraud): {tp}
β€’ False Positives (Safe β†’ Fraud): {fp} {"⚠️ HIGH!" if fp > tp * 2 else ""}
β€’ False Negatives (Fraud β†’ Safe): {fn} {"🚨 CRITICAL!" if fn > 0 else ""}
β€’ True Negatives (Correct Safe): {tn}
βš›οΈ HYBRID MODEL SCORES:
β€’ Average Final Score: {avg_final:.4f}
β€’ Classical Component: {avg_classical:.4f} (80% weight)
β€’ Quantum Ensemble: {avg_quantum:.4f} (20% weight)
🎯 THRESHOLD ANALYSIS:
β€’ Current Threshold: 0.5 (configurable)
β€’ Scores Range: {min(final_scores):.3f} - {max(final_scores):.3f}
{'β€’ RECOMMENDATION: Consider adjusting threshold based on precision/recall balance' if precision < 0.5 or recall < 0.5 else ''}
πŸ”¬ QUANTUM COMPONENT STATUS:
β€’ VQC (Pattern Recognition): Active
β€’ QAOA (Optimization): Active
β€’ QNN (Neural Processing): Active
"""
# In the _get_system_context method, add threshold analysis
if precision < 0.7 and recall < 0.7:
context += """
🎯 THRESHOLD RECOMMENDATION:
Current threshold may be suboptimal. Consider:
β€’ Lower threshold (0.45) to improve recall
β€’ Higher threshold (0.65) to improve precision
β€’ Current performance suggests class imbalance
"""
return context
def get_response(self, user_message, history=None):
"""Generate AI response with enhanced context and error handling"""
# Try built-in response first (always available, no rate limits)
builtin_response = self._get_builtin_response(user_message)
# Rate limiting check
current_time = time.time()
if current_time - self.last_request_time < self.min_request_interval:
if builtin_response:
return builtin_response + "\n\nπŸ’‘ *Response from built-in knowledge base*"
else:
wait_time = int(self.min_request_interval - (current_time - self.last_request_time))
return f"⏳ Please wait {wait_time} seconds between questions to avoid rate limits."
# Validate API key
if not self.api_key or self.api_key == "" or not self.api_key.startswith("sk-or-v1-"):
if builtin_response:
return builtin_response + "\n\nπŸ’‘ *Response from built-in knowledge base (AI service not configured)*"
return """πŸ€– **AI Assistant Configuration Required**
To enable intelligent fraud analysis:
1. **Get Free API Key**: Visit [OpenRouter.ai](https://openrouter.ai)
2. **Configure**: Create a `.env` file with:
```
OPENROUTER_API_KEY=sk-or-v1-your-key-here
```
3. **Restart**: The backend server
**Current Status**: Core fraud detection system is fully operational. Built-in responses available for common questions."""
# Build context from system state
system_context = self._get_system_context(history) if history else "System ready for analysis."
# Construct conversation
messages = [
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": f"""
CURRENT SYSTEM DATA:
{system_context}
USER QUESTION: {user_message}
Please provide a clear, specific response using the current system metrics. Focus on actionable insights and practical recommendations.
"""}
]
try:
self.last_request_time = current_time
# Make API request
response = requests.post(
self.base_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://quantum-fraud-detector.local",
"X-Title": "Quantum Fraud Detection System"
},
json={
"model": self.model,
"messages": messages,
"max_tokens": 400,
"temperature": 0.3,
"top_p": 0.9,
"frequency_penalty": 0.2,
"presence_penalty": 0.1
},
timeout=30
)
# Handle successful response
if response.status_code == 200:
result = response.json()
ai_response = result['choices'][0]['message']['content'].strip()
ai_response = ai_response.replace('```', '').replace('**', '**')
if any(keyword in user_message.lower() for keyword in ['quantum', 'vqc', 'qaoa', 'qnn']):
ai_response += "\n\nπŸ’‘ **Quick Reference**: VQC = Variational Quantum Circuit, QAOA = Quantum Optimization, QNN = Quantum Neural Network"
if any(keyword in user_message.lower() for keyword in ['precision', 'recall', 'f1']):
ai_response += "\n\nπŸ“Š **Metric Tip**: Balance precision (fraud accuracy) vs recall (fraud coverage) based on business priorities."
return ai_response
# Handle API errors - use builtin fallback
elif response.status_code == 429:
if builtin_response:
return builtin_response + "\n\nπŸ’‘ *Response from built-in knowledge base (AI rate limited)*"
return """⏰ **Rate Limit Exceeded**
Too many requests to the AI service. Try asking about:
β€’ How fraud detection works
β€’ Quantum vs classical advantage
β€’ Precision and recall
β€’ Threshold optimization
These topics have built-in responses available!"""
elif response.status_code in [401, 402]:
if builtin_response:
return builtin_response + "\n\nπŸ’‘ *Response from built-in knowledge base*"
return """πŸ” **AI Service Issue**
API authentication failed. Built-in responses available for common questions about:
β€’ Fraud detection methodology
β€’ Quantum model components
β€’ Performance optimization"""
else:
if builtin_response:
return builtin_response + "\n\nπŸ’‘ *Response from built-in knowledge base*"
return f"⚠️ AI service returned status {response.status_code}. Try asking about fraud detection basics."
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.exceptions.RequestException):
if builtin_response:
return builtin_response + "\n\nπŸ’‘ *Response from built-in knowledge base (network issue)*"
return """🌐 **Connection Issue**
Unable to reach AI service. The fraud detection system is working normally.
**Available Topics** (no network needed):
β€’ "How does fraud detection work?"
β€’ "Explain quantum advantage"
β€’ "What is precision vs recall?"
β€’ "How to reduce false positives?" """
except Exception as e:
if builtin_response:
return builtin_response
return "πŸ› οΈ An error occurred. Core fraud detection remains operational."
def _get_builtin_response(self, user_message):
"""Match user question to built-in responses"""
message_lower = user_message.lower()
# Pattern matching for common questions
patterns = {
r'(how|does|what).*(fraud|detection|detect|work|identify)': 'fraud_detection',
r'(quantum|classical|advantage|better|compare|difference|useful)': 'quantum_advantage',
r'(precision|recall|f1|metric|score|accuracy)': 'precision_recall',
r'(threshold|cutoff|adjust|tune|optimal)': 'threshold',
r'(vqc|qaoa|qnn|variational|quantum.*(model|circuit|neural))': 'vqc_qaoa_qnn',
r'(improve|increase|better|accuracy|performance|optimize)': 'improve_accuracy',
r'(false.?positive|wrong|mistake|legitimate|block)': 'false_positives',
r'(architecture|system|how.*work|overview|explain.*system)': 'how_it_works',
}
for pattern, key in patterns.items():
if re.search(pattern, message_lower):
return self.builtin_responses.get(key)
return None
# --- Test Function ---
def test_chatbot():
"""Test the enhanced chatbot functionality"""
print("πŸ§ͺ Testing Enhanced AI Fraud Detection Assistant")
print("=" * 60)
api_key = os.getenv("OPENROUTER_API_KEY", "")
if not api_key:
print("❌ No API key found. Set OPENROUTER_API_KEY environment variable.")
return
chatbot = AIFraudChatbot(api_key=api_key)
# Simulate realistic transaction history
test_history = []
for i in range(100):
test_history.append({
'Prediction': 'Fraud' if i % 20 == 0 else 'Safe', # 5% fraud rate
'is_fraud': 1 if i % 25 == 0 else 0, # 4% actual fraud
'Final_Score': 0.6 + (i % 5) * 0.1, # Varied scores
'Quantum_Score': 0.3 + (i % 3) * 0.1,
'Classical_Score': 0.7 + (i % 4) * 0.05
})
test_questions = [
"Why is my precision so low?",
"How can I improve recall performance?",
"Explain how the quantum ensemble works",
"What's the optimal fraud threshold?",
"How do false positives impact my business?"
]
for i, question in enumerate(test_questions, 1):
print(f"\nπŸ”Έ Test {i}: {question}")
print("-" * 50)
response = chatbot.get_response(question, test_history)
print(response[:200] + "..." if len(response) > 200 else response)
time.sleep(1) # Respectful rate limiting
if __name__ == "__main__":
test_chatbot()