"""Gradio interface for Fraud Detection Chatbot.""" import logging import warnings import os # Suppress warnings for cleaner output warnings.filterwarnings('ignore', category=FutureWarning) warnings.filterwarnings('ignore', category=DeprecationWarning) warnings.filterwarnings('ignore', category=UserWarning) warnings.filterwarnings('ignore', message='.*LangChain.*') # Disable ChromaDB telemetry to avoid errors os.environ['ANONYMIZED_TELEMETRY'] = 'False' import gradio as gr from pathlib import Path import pandas as pd from src.data.processor import FraudDataProcessor from src.llm.groq_client import GroqClient from src.rag.document_loader import DocumentLoader from src.rag.vector_store import VectorStore from src.services.fraud_analyzer import FraudAnalyzer from src.services.quality_scorer import ResponseQualityScorer from src.config.config import settings logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Suppress chromadb logging logging.getLogger('chromadb').setLevel(logging.ERROR) logging.getLogger('chromadb.telemetry').setLevel(logging.CRITICAL) # Initialize components globally groq_client = None vector_store = None fraud_analyzer = None data_processor = None quality_scorer = ResponseQualityScorer() def initialize_system(): """Initialize the fraud detection system.""" global groq_client, vector_store, fraud_analyzer, data_processor logger.info("Initializing Fraud Detection System...") # Initialize Groq client groq_client = GroqClient() logger.info("✓ Groq client initialized") # Initialize data processor data_processor = FraudDataProcessor() logger.info("✓ Data processor initialized") # Setup RAG system try: document_loader = DocumentLoader( chunk_size=settings.chunk_size, chunk_overlap=settings.chunk_overlap, ) all_documents = [] # Load PDF documents pdf_documents = document_loader.load_pdfs_from_directory(settings.pdf_dir) if pdf_documents: all_documents.extend(pdf_documents) logger.info(f"✓ Loaded {len(pdf_documents)} PDF documents") else: logger.warning("⚠ No PDF documents found") # Load CSV insights csv_path = settings.train_data_path if csv_path.exists(): try: csv_documents = document_loader.load_csv_insights(csv_path, sample_size=1050000) all_documents.extend(csv_documents) logger.info(f"✓ Loaded {len(csv_documents)} CSV insight documents") except Exception as e: logger.warning(f"⚠ Failed to load CSV insights: {e}") else: logger.warning(f"⚠ CSV file not found: {csv_path}") # Add all documents to vector store if all_documents: vector_store = VectorStore() vector_store.add_documents(all_documents) logger.info(f"✓ RAG system initialized with {len(all_documents)} total documents") else: logger.warning("⚠ No documents loaded for RAG system") except Exception as e: logger.warning(f"⚠ RAG setup failed: {e}") # Create fraud analyzer fraud_analyzer = FraudAnalyzer( groq_client=groq_client, vector_store=vector_store, ) logger.info("✓ Fraud analyzer initialized") return "✅ System initialized successfully!" def analyze_by_transaction_id(transaction_id: int, use_rag: bool): """Analyze fraud by transaction ID.""" if fraud_analyzer is None: return "❌ System not initialized. Please wait for initialization to complete." try: transaction_id = int(transaction_id) result = fraud_analyzer.analyze_transaction( transaction_id=transaction_id, use_rag=use_rag, ) # Format the response transaction = result['transaction'] analysis = result['analysis'] response = f"""### 📊 Transaction Details **Merchant:** {transaction.get('merchant', 'N/A')} **Category:** {transaction.get('category', 'N/A')} **Amount:** ${transaction.get('amt', 0):.2f} **City:** {transaction.get('city', 'N/A')} **State:** {transaction.get('state', 'N/A')} --- ### 🔍 Fraud Analysis {analysis} """ return response except Exception as e: logger.error(f"Analysis failed: {e}") return f"❌ Error: {str(e)}" def analyze_by_manual_data( merchant: str, category: str, amount: float, city: str, state: str, use_rag: bool, gender: str = None, age: int = None, job: str = None, zip_code: str = None, city_pop: int = None, merch_lat: float = None, merch_long: float = None ): """Analyze fraud by manual transaction data.""" if fraud_analyzer is None: return "❌ System not initialized. Please wait for initialization to complete." try: # Clean merchant name from prefix if present clean_merchant = merchant.replace('fraud_', '') if merchant else merchant transaction_data = { "merchant": clean_merchant, "category": category, "amt": float(amount), "city": city, "state": state, } # Add advanced fields if provided if gender: transaction_data["gender"] = gender if age: transaction_data["age"] = age if job: transaction_data["job"] = job if zip_code: transaction_data["zip"] = zip_code if city_pop: transaction_data["city_pop"] = city_pop if merch_lat is not None: transaction_data["merch_lat"] = merch_lat if merch_long is not None: transaction_data["merch_long"] = merch_long result = fraud_analyzer.analyze_transaction( transaction_data=transaction_data, use_rag=use_rag, ) analysis = result['analysis'] response = f"""### 📊 Transaction Details **Merchant:** {merchant} **Category:** {category} **Amount:** ${amount:.2f} **City:** {city} **State:** {state} """ # Add advanced fields to display if provided if gender or age or job: response += "\n**Cardholder Info:**\n" if gender: response += f"- Gender: {gender}\n" if age: response += f"- Age: {age}\n" if job: response += f"- Job: {job}\n" if zip_code or city_pop: response += "\n**Location Details:**\n" if zip_code: response += f"- ZIP: {zip_code}\n" if city_pop: response += f"- City Population: {city_pop:,}\n" if merch_lat is not None or merch_long is not None: response += "\n**Merchant Location:**\n" response += f"- Coordinates: ({merch_lat}, {merch_long})\n" response += f""" --- ### 🔍 Fraud Analysis {analysis} """ return response except Exception as e: logger.error(f"Analysis failed: {e}") return f"❌ Error: {str(e)}" def get_dataset_summary(): """Get dataset summary statistics including RAG documents.""" if data_processor is None: return "❌ System not initialized." try: # Get transaction data summary summary = data_processor.get_transaction_summary() response = f"""### 📊 Transaction Dataset Summary **Total Transactions:** {summary['total_transactions']:,} **Fraud Cases:** {summary['fraud_count']:,} **Fraud Rate:** {summary['fraud_percentage']:.2f}% **Average Amount:** ${summary['average_amount']:.2f} --- **Top Transaction Categories:** """ for category, count in list(summary['categories'].items())[:10]: response += f"\n- {category}: {count:,}" # Add RAG document summary if available if vector_store is not None: response += "\n\n---\n\n### 📚 RAG Knowledge Base\n\n" # Count documents by type try: # Get all documents from vector store all_docs = vector_store.vector_store._collection.get() if all_docs and 'metadatas' in all_docs: metadatas = all_docs['metadatas'] # Count by source type pdf_count = 0 csv_pattern_count = 0 csv_merchant_count = 0 csv_location_count = 0 csv_stats_count = 0 pdf_sources = set() for meta in metadatas: doc_type = meta.get('type', 'document') source = meta.get('source', '') if doc_type == 'fraud_pattern': csv_pattern_count += 1 elif doc_type == 'merchant_profile': csv_merchant_count += 1 elif doc_type == 'location_insight': csv_location_count += 1 elif doc_type == 'statistical_summary': csv_stats_count += 1 else: # PDF document pdf_count += 1 if source.endswith('.pdf'): pdf_sources.add(source) response += f"**Total Documents in RAG:** {len(metadatas):,}\n\n" if pdf_count > 0: response += f"**📄 PDF Research Documents:** {pdf_count:,}\n" for pdf in sorted(pdf_sources): response += f" - {pdf}\n" response += "\n" csv_total = csv_pattern_count + csv_merchant_count + csv_location_count + csv_stats_count if csv_total > 0: response += f"**📊 CSV-Derived Insights:** {csv_total:,}\n" if csv_pattern_count > 0: response += f" - Fraud Pattern Analysis: {csv_pattern_count}\n" if csv_merchant_count > 0: response += f" - Merchant Profiles: {csv_merchant_count}\n" if csv_location_count > 0: response += f" - Location Insights: {csv_location_count}\n" if csv_stats_count > 0: response += f" - Statistical Summaries: {csv_stats_count}\n" else: response += "**Status:** RAG system initialized but no document metadata available." except Exception as e: logger.warning(f"Could not retrieve RAG document stats: {e}") response += "**Status:** RAG system active (document count unavailable)" return response except Exception as e: logger.error(f"Summary failed: {e}") return f"❌ Error: {str(e)}" def chat_with_fraud_expert(message: str, history: list, use_rag: bool): """Chat with fraud detection expert.""" if groq_client is None: return history + [[message, "❌ System not initialized. Please wait for initialization to complete."]] try: # Check if message is asking about a specific transaction ID import re transaction_query = re.search(r'transaction\s+(?:id\s+)?(\d+)', message.lower()) transaction_context = "" if transaction_query and data_processor is not None: transaction_id = int(transaction_query.group(1)) try: # Get transaction data transaction = data_processor.get_transaction_summary(transaction_id) # Format transaction details with all relevant columns transaction_context = f"\n\n**Transaction ID {transaction_id} Details:**\n" transaction_context += f"- **Transaction Number:** {transaction.get('trans_num', 'N/A')}\n" transaction_context += f"- **Date/Time:** {transaction.get('trans_date_trans_time', 'N/A')}\n" transaction_context += f"- **Merchant:** {transaction.get('merchant', 'N/A')}\n" transaction_context += f"- **Category:** {transaction.get('category', 'N/A')}\n" transaction_context += f"- **Amount:** ${transaction.get('amt', 0):.2f}\n" transaction_context += f"- **Location:** {transaction.get('city', 'N/A')}, {transaction.get('state', 'N/A')}\n" transaction_context += f"- **Merchant Coordinates:** ({transaction.get('merch_lat', 'N/A')}, {transaction.get('merch_long', 'N/A')})\n" transaction_context += f"\n**Cardholder Information:**\n" transaction_context += f"- **Name:** {transaction.get('first', 'N/A')} {transaction.get('last', 'N/A')}\n" transaction_context += f"- **Gender:** {transaction.get('gender', 'N/A')}\n" transaction_context += f"- **Date of Birth:** {transaction.get('dob', 'N/A')}\n" transaction_context += f"- **Job:** {transaction.get('job', 'N/A')}\n" transaction_context += f"- **Street:** {transaction.get('street', 'N/A')}\n" transaction_context += f"- **City/State/ZIP:** {transaction.get('city', 'N/A')}, {transaction.get('state', 'N/A')} {transaction.get('zip', 'N/A')}\n" transaction_context += f"- **Cardholder Coordinates:** ({transaction.get('lat', 'N/A')}, {transaction.get('long', 'N/A')})\n" transaction_context += f"- **City Population:** {transaction.get('city_pop', 'N/A')}\n" transaction_context += f"\n**Card Information:**\n" transaction_context += f"- **Card Number:** {transaction.get('cc_num', 'N/A')}\n" transaction_context += f"\n**Fraud Status:**\n" transaction_context += f"- **Actual Status:** {'🚨 FRAUD' if transaction.get('is_fraud', 0) == 1 else '✅ LEGITIMATE'}\n" logger.info(f"Found transaction {transaction_id} for chat query") except ValueError as e: transaction_context = f"\n\n**Note:** {str(e)}\n" except Exception as e: logger.warning(f"Could not fetch transaction {transaction_id}: {e}") # If RAG is enabled and vector store is available, get relevant context context = "" source_references = [] if use_rag and vector_store is not None: docs = vector_store.similarity_search(message, k=3) if docs: context = "\n\nRelevant context from fraud detection documents:\n" for i, doc in enumerate(docs, 1): # Add context with source number context += f"\n[Source {i}] {doc.page_content[:500]}...\n" # Collect source information for reference list source_file = doc.metadata.get('source', 'Unknown') page_num = doc.metadata.get('page', 'N/A') doc_type = doc.metadata.get('type', 'document') # Format source info if doc_type == 'fraud_pattern': category = doc.metadata.get('category', 'N/A') source_references.append(f"Source {i}: CSV Data - Fraud Pattern Analysis ({category})") elif doc_type == 'statistical_summary': scope = doc.metadata.get('scope', 'N/A') source_references.append(f"Source {i}: CSV Data - Statistical Summary ({scope})") elif doc_type == 'merchant_profile': merchant = doc.metadata.get('merchant', 'N/A') source_references.append(f"Source {i}: CSV Data - Merchant Profile ({merchant})") elif doc_type == 'location_insight': state = doc.metadata.get('state', 'N/A') source_references.append(f"Source {i}: CSV Data - Location Analysis ({state})") else: # PDF document if page_num != 'N/A': source_references.append(f"Source {i}: {source_file}, Page {page_num}") else: source_references.append(f"Source {i}: {source_file}") # Create prompt with transaction data and context full_prompt = message if transaction_context: full_prompt = f"{message}\n{transaction_context}" if context: full_prompt = f"{full_prompt}\n{context}" # Enhanced system message with inline citation instructions system_message = """You are an expert fraud detection analyst. Help users understand fraud patterns, detection methods, and transaction analysis. IMPORTANT CITATION RULES: - When using information from the provided context sources, you MUST add an inline citation immediately after the relevant sentence or paragraph. - Format citations as: [Source X] where X is the source number from the context. - Place citations at the end of sentences that use information from that source. - You can cite multiple sources in one paragraph if needed: [Source 1, Source 2] - Be specific and reference the data when using information from sources. TRANSACTION ANALYSIS: - If transaction details are provided, analyze them thoroughly. - Note: Ignore "fraud_" prefix in merchant names; it is an artifact of the synthetic dataset and NOT an indicator of fraud. - Compare transaction characteristics against known fraud patterns. - Provide a clear fraud risk assessment (Low/Medium/High). - Explain your reasoning with specific indicators. Example: "Online gaming merchants often experience higher fraud rates due to card-not-present transactions. [Source 1] The average fraud rate in this category is 5.2%. [Source 2]" Provide clear, actionable insights with proper inline citations.""" # Get response from LLM response = groq_client.invoke( prompt=full_prompt, system_message=system_message, ) # Score response quality score_result = quality_scorer.score_response( response=response, query=message, has_rag=use_rag and vector_store is not None, sources=source_references, ) # Add quality score display quality_display = quality_scorer.format_score_display(score_result) response += quality_display # Add source reference list at the end if source_references: response += "\n**📚 Source References:**\n" for ref in source_references: response += f"\n- {ref}" # Log quality score logger.info(f"Response quality score: {score_result['overall_score']}/100 (Grade: {score_result['grade']})") history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response}) return history except Exception as e: logger.error(f"Chat failed: {e}") history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": f"❌ Error: {str(e)}"}) return history # Create Gradio interface def create_interface(): """Create the Gradio interface.""" with gr.Blocks( theme=gr.themes.Soft( primary_hue="blue", secondary_hue="slate", font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], ), title="Fraud Detection Chatbot", css=""" @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); * { font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important; } .gradio-container { max-width: 1200px !important; } h1, h2, h3, h4, h5, h6 { font-weight: 600 !important; } .markdown-text { font-size: 15px !important; line-height: 1.6 !important; } button { font-weight: 500 !important; } """ ) as demo: gr.Markdown(""" # 🛡️ Fraud Detection Chatbot AI-powered fraud detection system using LangChain, Groq, and RAG (Retrieval Augmented Generation). """) # System status with gr.Row(): init_status = gr.Textbox( label="System Status", value="Initializing...", interactive=False, ) # Tabs for different functionalities with gr.Tabs(): # Tab 1: Chat with Expert with gr.Tab("💬 Chat with Fraud Expert"): gr.Markdown(""" Ask questions about fraud detection, transaction patterns, or get expert advice. """) with gr.Row(): chat_use_rag = gr.Checkbox( label="Use RAG (Enhanced with fraud detection documents + CSV data)", value=True, ) chatbot = gr.Chatbot( label="Fraud Detection Expert", height=500, ) with gr.Row(): chat_input = gr.Textbox( label="Your Question", placeholder="Ask about fraud detection, transaction analysis, etc...", scale=4, ) chat_submit = gr.Button("Send", variant="primary", scale=1) chat_clear = gr.Button("Clear Chat") # Chat examples gr.Examples( examples=[ "What are common indicators of credit card fraud?", "How can I detect unusual transaction patterns?", "What are fraud patterns in grocery transactions?", "Which merchants have high fraud rates?", "What states have elevated fraud activity?", ], inputs=chat_input, ) # Tab 2: Analyze by Transaction ID with gr.Tab("🔍 Analyze by Transaction ID"): gr.Markdown(""" Analyze a specific transaction from the dataset by its ID. """) txn_id_input = gr.Number( label="Transaction ID", value=0, precision=0, ) txn_id_use_rag = gr.Checkbox( label="Use RAG (Enhanced analysis)", value=True, ) txn_id_submit = gr.Button("Analyze Transaction", variant="primary") txn_id_output = gr.Markdown(label="Analysis Result") # Tab 3: Analyze Manual Transaction with gr.Tab("✍️ Analyze Manual Transaction"): gr.Markdown(""" Enter transaction details manually for fraud analysis. """) # Basic Fields gr.Markdown("### Basic Transaction Information") manual_merchant = gr.Textbox( label="Merchant Name", placeholder="e.g., Amazon, Walmart", ) manual_category = gr.Dropdown( label="Category", choices=[ "grocery_pos", "gas_transport", "misc_net", "shopping_net", "shopping_pos", "entertainment", "food_dining", "personal_care", "health_fitness", "travel", "kids_pets", "home" ], value="grocery_pos", ) manual_amount = gr.Number( label="Amount ($)", value=100.0, ) manual_city = gr.Textbox( label="City", placeholder="e.g., Jakarta", ) manual_state = gr.Textbox( label="State", placeholder="e.g., DKI", ) # Advanced Fields (Accordion) with gr.Accordion("🔧 Advanced Fields (Optional)", open=False): gr.Markdown("*Provide additional details for more accurate fraud analysis*") with gr.Row(): manual_gender = gr.Radio( label="Cardholder Gender", choices=["M", "F"], value="M", ) manual_age = gr.Number( label="Cardholder Age", value=35, precision=0, ) manual_job = gr.Textbox( label="Cardholder Job", placeholder="e.g., Engineer, Teacher", ) with gr.Row(): manual_zip = gr.Textbox( label="ZIP Code", placeholder="e.g., 12345", ) manual_city_pop = gr.Number( label="City Population", value=100000, precision=0, ) with gr.Row(): manual_merch_lat = gr.Number( label="Merchant Latitude", value=0.0, ) manual_merch_long = gr.Number( label="Merchant Longitude", value=0.0, ) manual_use_rag = gr.Checkbox( label="Use RAG (Enhanced analysis)", value=True, ) manual_submit = gr.Button("Analyze Transaction", variant="primary") manual_output = gr.Markdown(label="Analysis Result") # Tab 4: Dataset Summary with gr.Tab("📊 Dataset Summary"): gr.Markdown(""" View statistics and insights from the fraud detection dataset. """) summary_button = gr.Button("Get Dataset Summary", variant="primary") summary_output = gr.Markdown(label="Summary") # Event handlers def chat_fn(message, history, use_rag): return chat_with_fraud_expert(message, history, use_rag) chat_submit.click( fn=chat_fn, inputs=[chat_input, chatbot, chat_use_rag], outputs=chatbot, ).then( lambda: "", outputs=chat_input, ) chat_input.submit( fn=chat_fn, inputs=[chat_input, chatbot, chat_use_rag], outputs=chatbot, ).then( lambda: "", outputs=chat_input, ) chat_clear.click( lambda: [], outputs=chatbot, ) txn_id_submit.click( fn=analyze_by_transaction_id, inputs=[txn_id_input, txn_id_use_rag], outputs=txn_id_output, ) manual_submit.click( fn=analyze_by_manual_data, inputs=[ manual_merchant, manual_category, manual_amount, manual_city, manual_state, manual_use_rag, manual_gender, manual_age, manual_job, manual_zip, manual_city_pop, manual_merch_lat, manual_merch_long, ], outputs=manual_output, ) summary_button.click( fn=get_dataset_summary, outputs=summary_output, ) # Initialize system on load demo.load( fn=initialize_system, outputs=init_status, ) return demo if __name__ == "__main__": demo = create_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=False, )