"""Gradio interface for Fraud Detection Chatbot."""

import logging
import warnings
import os

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', message='.*LangChain.*')

# Disable ChromaDB telemetry to avoid errors
os.environ['ANONYMIZED_TELEMETRY'] = 'False'

import gradio as gr
from pathlib import Path
import pandas as pd

from src.data.processor import FraudDataProcessor
from src.llm.groq_client import GroqClient
from src.rag.document_loader import DocumentLoader
from src.rag.vector_store import VectorStore
from src.services.fraud_analyzer import FraudAnalyzer
from src.services.quality_scorer import ResponseQualityScorer
from src.config.config import settings

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Suppress chromadb logging
logging.getLogger('chromadb').setLevel(logging.ERROR)
logging.getLogger('chromadb.telemetry').setLevel(logging.CRITICAL)

# Initialize components globally
groq_client = None
vector_store = None
fraud_analyzer = None
data_processor = None
quality_scorer = ResponseQualityScorer()


def initialize_system():
    """Initialize the fraud detection system."""
    global groq_client, vector_store, fraud_analyzer, data_processor
    
    logger.info("Initializing Fraud Detection System...")
    
    # Initialize Groq client
    groq_client = GroqClient()
    logger.info("✓ Groq client initialized")
    
    # Initialize data processor
    data_processor = FraudDataProcessor()
    logger.info("✓ Data processor initialized")
    
    # Setup RAG system
    try:
        document_loader = DocumentLoader(
            chunk_size=settings.chunk_size,
            chunk_overlap=settings.chunk_overlap,
        )
        
        all_documents = []
        
        # Load PDF documents
        pdf_documents = document_loader.load_pdfs_from_directory(settings.pdf_dir)
        if pdf_documents:
            all_documents.extend(pdf_documents)
            logger.info(f"✓ Loaded {len(pdf_documents)} PDF documents")
        else:
            logger.warning("⚠ No PDF documents found")
        
        # Load CSV insights
        csv_path = settings.train_data_path
        if csv_path.exists():
            try:
                csv_documents = document_loader.load_csv_insights(csv_path, sample_size=1050000)
                all_documents.extend(csv_documents)
                logger.info(f"✓ Loaded {len(csv_documents)} CSV insight documents")
            except Exception as e:
                logger.warning(f"⚠ Failed to load CSV insights: {e}")
        else:
            logger.warning(f"⚠ CSV file not found: {csv_path}")
        
        # Add all documents to vector store
        if all_documents:
            vector_store = VectorStore()
            vector_store.add_documents(all_documents)
            logger.info(f"✓ RAG system initialized with {len(all_documents)} total documents")
        else:
            logger.warning("⚠ No documents loaded for RAG system")
            
    except Exception as e:
        logger.warning(f"⚠ RAG setup failed: {e}")

    
    # Create fraud analyzer
    fraud_analyzer = FraudAnalyzer(
        groq_client=groq_client,
        vector_store=vector_store,
    )
    logger.info("✓ Fraud analyzer initialized")
    
    return "✅ System initialized successfully!"


def analyze_by_transaction_id(transaction_id: int, use_rag: bool):
    """Analyze fraud by transaction ID."""
    if fraud_analyzer is None:
        return "❌ System not initialized. Please wait for initialization to complete."
    
    try:
        transaction_id = int(transaction_id)
        result = fraud_analyzer.analyze_transaction(
            transaction_id=transaction_id,
            use_rag=use_rag,
        )
        
        # Format the response
        transaction = result['transaction']
        analysis = result['analysis']
        
        response = f"""### 📊 Transaction Details
**Merchant:** {transaction.get('merchant', 'N/A')}
**Category:** {transaction.get('category', 'N/A')}
**Amount:** ${transaction.get('amt', 0):.2f}
**City:** {transaction.get('city', 'N/A')}
**State:** {transaction.get('state', 'N/A')}

---

### 🔍 Fraud Analysis
{analysis}
"""
        return response
        
    except Exception as e:
        logger.error(f"Analysis failed: {e}")
        return f"❌ Error: {str(e)}"


def analyze_by_manual_data(
    merchant: str, category: str, amount: float, city: str, state: str, use_rag: bool,
    gender: str = None, age: int = None, job: str = None, zip_code: str = None,
    city_pop: int = None, merch_lat: float = None, merch_long: float = None
):
    """Analyze fraud by manual transaction data."""
    if fraud_analyzer is None:
        return "❌ System not initialized. Please wait for initialization to complete."
    
    try:
        # Clean merchant name from prefix if present
        clean_merchant = merchant.replace('fraud_', '') if merchant else merchant
        
        transaction_data = {
            "merchant": clean_merchant,
            "category": category,
            "amt": float(amount),
            "city": city,
            "state": state,
        }
        
        # Add advanced fields if provided
        if gender:
            transaction_data["gender"] = gender
        if age:
            transaction_data["age"] = age
        if job:
            transaction_data["job"] = job
        if zip_code:
            transaction_data["zip"] = zip_code
        if city_pop:
            transaction_data["city_pop"] = city_pop
        if merch_lat is not None:
            transaction_data["merch_lat"] = merch_lat
        if merch_long is not None:
            transaction_data["merch_long"] = merch_long
        
        result = fraud_analyzer.analyze_transaction(
            transaction_data=transaction_data,
            use_rag=use_rag,
        )
        
        analysis = result['analysis']
        
        response = f"""### 📊 Transaction Details
**Merchant:** {merchant}
**Category:** {category}
**Amount:** ${amount:.2f}
**City:** {city}
**State:** {state}
"""
        
        # Add advanced fields to display if provided
        if gender or age or job:
            response += "\n**Cardholder Info:**\n"
            if gender:
                response += f"- Gender: {gender}\n"
            if age:
                response += f"- Age: {age}\n"
            if job:
                response += f"- Job: {job}\n"
        
        if zip_code or city_pop:
            response += "\n**Location Details:**\n"
            if zip_code:
                response += f"- ZIP: {zip_code}\n"
            if city_pop:
                response += f"- City Population: {city_pop:,}\n"
        
        if merch_lat is not None or merch_long is not None:
            response += "\n**Merchant Location:**\n"
            response += f"- Coordinates: ({merch_lat}, {merch_long})\n"
        
        response += f"""
---

### 🔍 Fraud Analysis
{analysis}
"""
        return response
        
    except Exception as e:
        logger.error(f"Analysis failed: {e}")
        return f"❌ Error: {str(e)}"


def get_dataset_summary():
    """Get dataset summary statistics including RAG documents."""
    if data_processor is None:
        return "❌ System not initialized."
    
    try:
        # Get transaction data summary
        summary = data_processor.get_transaction_summary()
        
        response = f"""### 📊 Transaction Dataset Summary

**Total Transactions:** {summary['total_transactions']:,}
**Fraud Cases:** {summary['fraud_count']:,}
**Fraud Rate:** {summary['fraud_percentage']:.2f}%
**Average Amount:** ${summary['average_amount']:.2f}

---

**Top Transaction Categories:**
"""
        for category, count in list(summary['categories'].items())[:10]:
            response += f"\n- {category}: {count:,}"
        
        # Add RAG document summary if available
        if vector_store is not None:
            response += "\n\n---\n\n### 📚 RAG Knowledge Base\n\n"
            
            # Count documents by type
            try:
                # Get all documents from vector store
                all_docs = vector_store.vector_store._collection.get()
                
                if all_docs and 'metadatas' in all_docs:
                    metadatas = all_docs['metadatas']
                    
                    # Count by source type
                    pdf_count = 0
                    csv_pattern_count = 0
                    csv_merchant_count = 0
                    csv_location_count = 0
                    csv_stats_count = 0
                    
                    pdf_sources = set()
                    
                    for meta in metadatas:
                        doc_type = meta.get('type', 'document')
                        source = meta.get('source', '')
                        
                        if doc_type == 'fraud_pattern':
                            csv_pattern_count += 1
                        elif doc_type == 'merchant_profile':
                            csv_merchant_count += 1
                        elif doc_type == 'location_insight':
                            csv_location_count += 1
                        elif doc_type == 'statistical_summary':
                            csv_stats_count += 1
                        else:
                            # PDF document
                            pdf_count += 1
                            if source.endswith('.pdf'):
                                pdf_sources.add(source)
                    
                    response += f"**Total Documents in RAG:** {len(metadatas):,}\n\n"
                    
                    if pdf_count > 0:
                        response += f"**📄 PDF Research Documents:** {pdf_count:,}\n"
                        for pdf in sorted(pdf_sources):
                            response += f"  - {pdf}\n"
                        response += "\n"
                    
                    csv_total = csv_pattern_count + csv_merchant_count + csv_location_count + csv_stats_count
                    if csv_total > 0:
                        response += f"**📊 CSV-Derived Insights:** {csv_total:,}\n"
                        if csv_pattern_count > 0:
                            response += f"  - Fraud Pattern Analysis: {csv_pattern_count}\n"
                        if csv_merchant_count > 0:
                            response += f"  - Merchant Profiles: {csv_merchant_count}\n"
                        if csv_location_count > 0:
                            response += f"  - Location Insights: {csv_location_count}\n"
                        if csv_stats_count > 0:
                            response += f"  - Statistical Summaries: {csv_stats_count}\n"
                else:
                    response += "**Status:** RAG system initialized but no document metadata available."
                    
            except Exception as e:
                logger.warning(f"Could not retrieve RAG document stats: {e}")
                response += "**Status:** RAG system active (document count unavailable)"
        
        return response
        
    except Exception as e:
        logger.error(f"Summary failed: {e}")
        return f"❌ Error: {str(e)}"


def chat_with_fraud_expert(message: str, history: list, use_rag: bool):
    """Chat with fraud detection expert."""
    if groq_client is None:
        return history + [[message, "❌ System not initialized. Please wait for initialization to complete."]]
    
    try:
        # Check if message is asking about a specific transaction ID
        import re
        transaction_query = re.search(r'transaction\s+(?:id\s+)?(\d+)', message.lower())
        transaction_context = ""
        
        if transaction_query and data_processor is not None:
            transaction_id = int(transaction_query.group(1))
            try:
                # Get transaction data
                transaction = data_processor.get_transaction_summary(transaction_id)
                
                # Format transaction details with all relevant columns
                transaction_context = f"\n\n**Transaction ID {transaction_id} Details:**\n"
                transaction_context += f"- **Transaction Number:** {transaction.get('trans_num', 'N/A')}\n"
                transaction_context += f"- **Date/Time:** {transaction.get('trans_date_trans_time', 'N/A')}\n"
                transaction_context += f"- **Merchant:** {transaction.get('merchant', 'N/A')}\n"
                transaction_context += f"- **Category:** {transaction.get('category', 'N/A')}\n"
                transaction_context += f"- **Amount:** ${transaction.get('amt', 0):.2f}\n"
                transaction_context += f"- **Location:** {transaction.get('city', 'N/A')}, {transaction.get('state', 'N/A')}\n"
                transaction_context += f"- **Merchant Coordinates:** ({transaction.get('merch_lat', 'N/A')}, {transaction.get('merch_long', 'N/A')})\n"
                transaction_context += f"\n**Cardholder Information:**\n"
                transaction_context += f"- **Name:** {transaction.get('first', 'N/A')} {transaction.get('last', 'N/A')}\n"
                transaction_context += f"- **Gender:** {transaction.get('gender', 'N/A')}\n"
                transaction_context += f"- **Date of Birth:** {transaction.get('dob', 'N/A')}\n"
                transaction_context += f"- **Job:** {transaction.get('job', 'N/A')}\n"
                transaction_context += f"- **Street:** {transaction.get('street', 'N/A')}\n"
                transaction_context += f"- **City/State/ZIP:** {transaction.get('city', 'N/A')}, {transaction.get('state', 'N/A')} {transaction.get('zip', 'N/A')}\n"
                transaction_context += f"- **Cardholder Coordinates:** ({transaction.get('lat', 'N/A')}, {transaction.get('long', 'N/A')})\n"
                transaction_context += f"- **City Population:** {transaction.get('city_pop', 'N/A')}\n"
                transaction_context += f"\n**Card Information:**\n"
                transaction_context += f"- **Card Number:** {transaction.get('cc_num', 'N/A')}\n"
                transaction_context += f"\n**Fraud Status:**\n"
                transaction_context += f"- **Actual Status:** {'🚨 FRAUD' if transaction.get('is_fraud', 0) == 1 else '✅ LEGITIMATE'}\n"
                
                logger.info(f"Found transaction {transaction_id} for chat query")
            except ValueError as e:
                transaction_context = f"\n\n**Note:** {str(e)}\n"
            except Exception as e:
                logger.warning(f"Could not fetch transaction {transaction_id}: {e}")
        
        # If RAG is enabled and vector store is available, get relevant context
        context = ""
        source_references = []
        
        if use_rag and vector_store is not None:
            docs = vector_store.similarity_search(message, k=3)
            if docs:
                context = "\n\nRelevant context from fraud detection documents:\n"
                for i, doc in enumerate(docs, 1):
                    # Add context with source number
                    context += f"\n[Source {i}] {doc.page_content[:500]}...\n"
                    
                    # Collect source information for reference list
                    source_file = doc.metadata.get('source', 'Unknown')
                    page_num = doc.metadata.get('page', 'N/A')
                    doc_type = doc.metadata.get('type', 'document')
                    
                    # Format source info
                    if doc_type == 'fraud_pattern':
                        category = doc.metadata.get('category', 'N/A')
                        source_references.append(f"Source {i}: CSV Data - Fraud Pattern Analysis ({category})")
                    elif doc_type == 'statistical_summary':
                        scope = doc.metadata.get('scope', 'N/A')
                        source_references.append(f"Source {i}: CSV Data - Statistical Summary ({scope})")
                    elif doc_type == 'merchant_profile':
                        merchant = doc.metadata.get('merchant', 'N/A')
                        source_references.append(f"Source {i}: CSV Data - Merchant Profile ({merchant})")
                    elif doc_type == 'location_insight':
                        state = doc.metadata.get('state', 'N/A')
                        source_references.append(f"Source {i}: CSV Data - Location Analysis ({state})")
                    else:
                        # PDF document
                        if page_num != 'N/A':
                            source_references.append(f"Source {i}: {source_file}, Page {page_num}")
                        else:
                            source_references.append(f"Source {i}: {source_file}")
        
        # Create prompt with transaction data and context
        full_prompt = message
        if transaction_context:
            full_prompt = f"{message}\n{transaction_context}"
        if context:
            full_prompt = f"{full_prompt}\n{context}"
        
        # Enhanced system message with inline citation instructions
        system_message = """You are an expert fraud detection analyst. Help users understand fraud patterns, detection methods, and transaction analysis. 

IMPORTANT CITATION RULES:
- When using information from the provided context sources, you MUST add an inline citation immediately after the relevant sentence or paragraph.
- Format citations as: [Source X] where X is the source number from the context.
- Place citations at the end of sentences that use information from that source.
- You can cite multiple sources in one paragraph if needed: [Source 1, Source 2]
- Be specific and reference the data when using information from sources.

TRANSACTION ANALYSIS:
- If transaction details are provided, analyze them thoroughly.
- Note: Ignore "fraud_" prefix in merchant names; it is an artifact of the synthetic dataset and NOT an indicator of fraud.
- Compare transaction characteristics against known fraud patterns.
- Provide a clear fraud risk assessment (Low/Medium/High).
- Explain your reasoning with specific indicators.

Example:
"Online gaming merchants often experience higher fraud rates due to card-not-present transactions. [Source 1] The average fraud rate in this category is 5.2%. [Source 2]"

Provide clear, actionable insights with proper inline citations."""

        # Get response from LLM
        response = groq_client.invoke(
            prompt=full_prompt,
            system_message=system_message,
        )
        
        # Score response quality
        score_result = quality_scorer.score_response(
            response=response,
            query=message,
            has_rag=use_rag and vector_store is not None,
            sources=source_references,
        )
        
        # Add quality score display
        quality_display = quality_scorer.format_score_display(score_result)
        response += quality_display
        
        # Add source reference list at the end
        if source_references:
            response += "\n**📚 Source References:**\n"
            for ref in source_references:
                response += f"\n- {ref}"
        
        # Log quality score
        logger.info(f"Response quality score: {score_result['overall_score']}/100 (Grade: {score_result['grade']})")
        
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": response})
        return history
        
    except Exception as e:
        logger.error(f"Chat failed: {e}")
        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
        return history


# Create Gradio interface
def create_interface():
    """Create the Gradio interface."""
    
    with gr.Blocks(
        theme=gr.themes.Soft(
            primary_hue="blue",
            secondary_hue="slate",
            font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
        ),
        title="Fraud Detection Chatbot",
        css="""
        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
        
        * {
            font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important;
        }
        
        .gradio-container {
            max-width: 1200px !important;
        }
        
        h1, h2, h3, h4, h5, h6 {
            font-weight: 600 !important;
        }
        
        .markdown-text {
            font-size: 15px !important;
            line-height: 1.6 !important;
        }
        
        button {
            font-weight: 500 !important;
        }
        """
    ) as demo:
        
        gr.Markdown("""
        # 🛡️ Fraud Detection Chatbot
        
        AI-powered fraud detection system using LangChain, Groq, and RAG (Retrieval Augmented Generation).
        """)
        
        # System status
        with gr.Row():
            init_status = gr.Textbox(
                label="System Status",
                value="Initializing...",
                interactive=False,
            )
        
        # Tabs for different functionalities
        with gr.Tabs():
            
            # Tab 1: Chat with Expert
            with gr.Tab("💬 Chat with Fraud Expert"):
                gr.Markdown("""
                Ask questions about fraud detection, transaction patterns, or get expert advice.
                """)
                
                with gr.Row():
                    chat_use_rag = gr.Checkbox(
                        label="Use RAG (Enhanced with fraud detection documents + CSV data)",
                        value=True,
                    )
                
                chatbot = gr.Chatbot(
                    label="Fraud Detection Expert",
                    height=500,
                )
                
                with gr.Row():
                    chat_input = gr.Textbox(
                        label="Your Question",
                        placeholder="Ask about fraud detection, transaction analysis, etc...",
                        scale=4,
                    )
                    chat_submit = gr.Button("Send", variant="primary", scale=1)
                
                chat_clear = gr.Button("Clear Chat")
                
                # Chat examples
                gr.Examples(
                    examples=[
                        "What are common indicators of credit card fraud?",
                        "How can I detect unusual transaction patterns?",
                        "What are fraud patterns in grocery transactions?",
                        "Which merchants have high fraud rates?",
                        "What states have elevated fraud activity?",
                    ],
                    inputs=chat_input,
                )
            
            # Tab 2: Analyze by Transaction ID
            with gr.Tab("🔍 Analyze by Transaction ID"):
                gr.Markdown("""
                Analyze a specific transaction from the dataset by its ID.
                """)
                
                txn_id_input = gr.Number(
                    label="Transaction ID",
                    value=0,
                    precision=0,
                )
                txn_id_use_rag = gr.Checkbox(
                    label="Use RAG (Enhanced analysis)",
                    value=True,
                )
                txn_id_submit = gr.Button("Analyze Transaction", variant="primary")
                
                txn_id_output = gr.Markdown(label="Analysis Result")

            
            # Tab 3: Analyze Manual Transaction
            with gr.Tab("✍️ Analyze Manual Transaction"):
                gr.Markdown("""
                Enter transaction details manually for fraud analysis.
                """)
                
                # Basic Fields
                gr.Markdown("### Basic Transaction Information")
                manual_merchant = gr.Textbox(
                    label="Merchant Name",
                    placeholder="e.g., Amazon, Walmart",
                )
                manual_category = gr.Dropdown(
                    label="Category",
                    choices=[
                        "grocery_pos", "gas_transport", "misc_net", 
                        "shopping_net", "shopping_pos", "entertainment",
                        "food_dining", "personal_care", "health_fitness",
                        "travel", "kids_pets", "home"
                    ],
                    value="grocery_pos",
                )
                manual_amount = gr.Number(
                    label="Amount ($)",
                    value=100.0,
                )
                manual_city = gr.Textbox(
                    label="City",
                    placeholder="e.g., Jakarta",
                )
                manual_state = gr.Textbox(
                    label="State",
                    placeholder="e.g., DKI",
                )
                
                # Advanced Fields (Accordion)
                with gr.Accordion("🔧 Advanced Fields (Optional)", open=False):
                    gr.Markdown("*Provide additional details for more accurate fraud analysis*")
                    
                    with gr.Row():
                        manual_gender = gr.Radio(
                            label="Cardholder Gender",
                            choices=["M", "F"],
                            value="M",
                        )
                        manual_age = gr.Number(
                            label="Cardholder Age",
                            value=35,
                            precision=0,
                        )
                    
                    manual_job = gr.Textbox(
                        label="Cardholder Job",
                        placeholder="e.g., Engineer, Teacher",
                    )
                    
                    with gr.Row():
                        manual_zip = gr.Textbox(
                            label="ZIP Code",
                            placeholder="e.g., 12345",
                        )
                        manual_city_pop = gr.Number(
                            label="City Population",
                            value=100000,
                            precision=0,
                        )
                    
                    with gr.Row():
                        manual_merch_lat = gr.Number(
                            label="Merchant Latitude",
                            value=0.0,
                        )
                        manual_merch_long = gr.Number(
                            label="Merchant Longitude",
                            value=0.0,
                        )
                
                manual_use_rag = gr.Checkbox(
                    label="Use RAG (Enhanced analysis)",
                    value=True,
                )
                manual_submit = gr.Button("Analyze Transaction", variant="primary")
                
                manual_output = gr.Markdown(label="Analysis Result")

            
            # Tab 4: Dataset Summary
            with gr.Tab("📊 Dataset Summary"):
                gr.Markdown("""
                View statistics and insights from the fraud detection dataset.
                """)
                
                summary_button = gr.Button("Get Dataset Summary", variant="primary")
                summary_output = gr.Markdown(label="Summary")
        
        # Event handlers
        def chat_fn(message, history, use_rag):
            return chat_with_fraud_expert(message, history, use_rag)
        
        chat_submit.click(
            fn=chat_fn,
            inputs=[chat_input, chatbot, chat_use_rag],
            outputs=chatbot,
        ).then(
            lambda: "",
            outputs=chat_input,
        )
        
        chat_input.submit(
            fn=chat_fn,
            inputs=[chat_input, chatbot, chat_use_rag],
            outputs=chatbot,
        ).then(
            lambda: "",
            outputs=chat_input,
        )
        
        chat_clear.click(
            lambda: [],
            outputs=chatbot,
        )
        
        txn_id_submit.click(
            fn=analyze_by_transaction_id,
            inputs=[txn_id_input, txn_id_use_rag],
            outputs=txn_id_output,
        )
        
        manual_submit.click(
            fn=analyze_by_manual_data,
            inputs=[
                manual_merchant,
                manual_category,
                manual_amount,
                manual_city,
                manual_state,
                manual_use_rag,
                manual_gender,
                manual_age,
                manual_job,
                manual_zip,
                manual_city_pop,
                manual_merch_lat,
                manual_merch_long,
            ],
            outputs=manual_output,
        )
        
        summary_button.click(
            fn=get_dataset_summary,
            outputs=summary_output,
        )
        
        # Initialize system on load
        demo.load(
            fn=initialize_system,
            outputs=init_status,
        )
    
    return demo


if __name__ == "__main__":
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
    )