import os
import json
import faiss
import numpy as np
import requests
import gradio as gr
from dotenv import load_dotenv
import openai
import re
import time

# ---------- config ----------
EMBED_MODEL = "text-embedding-3-small"            # OpenAI
GPT_MODEL   = "google/gemini-2.5-flash-preview-05-20"  # OpenRouter
SIM_THRESHOLD = 0.30                              # tweak if recall is poor
TOP_K = 3
DISCLAIMER = "General info only, not a commitment to lend."
# ----------------------------

load_dotenv()
openai.api_key       = os.getenv("OPENAI_API_KEY")
OPENROUTER_API_KEY   = os.getenv("OPENROUTER_API_KEY")

# ----- load glossary vectors -----
with open("chunks.json", encoding="utf8") as f:
    CHUNKS = json.load(f)

INDEX = faiss.read_index("glossary.index")

# ----- PII detection (compliance requirement) -----
def contains_pii(text: str) -> bool:
    """Basic PII detection for emails, SSNs, credit scores."""
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    ssn_pattern = r'\b\d{3}-?\d{2}-?\d{4}\b'
    # Tightened credit score pattern to avoid false positives like "Form 4506-C"
    credit_pattern = r'\b(?:[4-8]\d{2})(?:\s*credit\s*score)?\b'
    
    return bool(re.search(email_pattern, text) or 
                re.search(ssn_pattern, text) or 
                re.search(credit_pattern, text))

# ----- conversation memory helpers -----
def detect_followup_question(question: str) -> bool:
    """Detect if a question is asking for elaboration or follow-up."""
    followup_patterns = [
        r'\b(elaborate|expand|explain more|tell me more|more details|further|additionally)\b',
        r'\b(can you|could you|would you).*(more|further|elaborate|expand)\b',
        r'\b(what about|how about|what else)\b',
        r'\b(that|this|it)\b.*\?',  # References to previous topic
        r'^\s*(more|further|additionally|also)\b',
        r'\b(give me more|tell me more|say more)\b'
    ]
    
    question_lower = question.lower()
    return any(re.search(pattern, question_lower) for pattern in followup_patterns)

def extract_last_topic(history):
    """Extract the main topic from the most recent bot response."""
    if not history or len(history) == 0:
        return None
    
    # Get the last bot response
    last_exchange = history[-1]
    if isinstance(last_exchange, dict) and 'content' in last_exchange:
        last_response = last_exchange['content']
    elif isinstance(last_exchange, list) and len(last_exchange) >= 2:
        last_response = last_exchange[1]  # Bot response
    else:
        return None
    
    # Extract key terms from the response (before disclaimer)
    if DISCLAIMER in last_response:
        content = last_response.split(DISCLAIMER)[0].strip()
    else:
        content = last_response
    
    # Look for capitalized terms and common Non-QM keywords
    terms = re.findall(r'\b[A-Z][A-Za-z-]+(?:\s+[A-Z][A-Za-z-]+)*\b', content)
    nqm_keywords = ['Non-QM', 'DSCR', 'DTI', 'income', 'ratio', 'loan', 'mortgage', 'lending']
    
    # Return the first meaningful term found
    for term in terms:
        if len(term) > 3 and any(keyword.lower() in term.lower() for keyword in nqm_keywords):
            return term
    
    return None

# ----- helpers -----
def embed(text: str) -> np.ndarray:
    """Call OpenAI embedding endpoint and return a normalized float32 numpy vector."""
    res = openai.embeddings.create(
        model=EMBED_MODEL,
        input=[text]
    )
    vec = np.array(res.data[0].embedding, dtype="float32")
    # Normalize the vector for consistent similarity computation
    faiss.normalize_L2(vec.reshape(1, -1))
    return vec

def retrieve(question: str, conversation_context: str = None):
    """Return chunks whose cosine sim >= threshold, with optional conversation context."""
    # Use conversation context for better retrieval if available
    search_query = question
    if conversation_context and detect_followup_question(question):
        search_query = f"{conversation_context} {question}"
    
    vec = embed(search_query).reshape(1, -1)
    scores, ids = INDEX.search(vec, TOP_K)
    
    relevant_chunks = [
        CHUNKS[i]
        for i, s in zip(ids[0], scores[0])
        if s >= SIM_THRESHOLD
    ]
    
    # If no results with conversation context, try just the question
    if not relevant_chunks and conversation_context:
        vec = embed(question).reshape(1, -1)
        scores, ids = INDEX.search(vec, TOP_K)
        relevant_chunks = [
            CHUNKS[i]
            for i, s in zip(ids[0], scores[0])
            if s >= SIM_THRESHOLD
        ]
    
    return relevant_chunks

def call_llm_streaming(question: str, context: str, is_followup: bool = False):
    """Stream LLM response while ensuring compliance."""
    # Adjust prompt for follow-up questions
    if is_followup:
        prompt = (
            "You are a Non-QM glossary assistant.\n"
            "The user is asking for more details about a previous topic.\n"
            "Answer with additional information from the context.\n"
            "Keep it to 3 sentences max. Finish with this exact line:\n"
            f"{DISCLAIMER}\n\n"
            f"User: {question}\n"
            f"Context:\n{context}"
        )
        max_tokens = 150  # Allow slightly more for elaboration
    else:
        prompt = (
            "You are a Non-QM glossary assistant.\n"
            "Answer the user only with information in the context.\n"
            "Two sentences max. Finish with this exact line:\n"
            f"{DISCLAIMER}\n\n"
            f"User: {question}\n"
            f"Context:\n{context}"
        )
        max_tokens = 120

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "X-Title": "nonqm-glossary-bot"
    }

    try:
        resp = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers=headers,
            json={
                "model": GPT_MODEL,
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": max_tokens,
                "temperature": 0.3,
                "stream": True
            },
            timeout=60,  # Increased timeout for OpenRouter stability
            stream=True
        )
        resp.raise_for_status()
        
        accumulated_text = ""
        for line in resp.iter_lines():
            if line:
                line = line.decode('utf-8')
                if line.startswith('data: '):
                    line = line[6:]
                    if line.strip() == '[DONE]':
                        break
                    try:
                        data = json.loads(line)
                        if 'choices' in data and len(data['choices']) > 0:
                            delta = data['choices'][0].get('delta', {})
                            if 'content' in delta:
                                content = delta['content']
                                accumulated_text += content
                                yield accumulated_text
                                time.sleep(0.02)  # Small delay for smooth streaming
                    except json.JSONDecodeError:
                        continue
    except Exception as e:
        # Fallback to non-streaming if streaming fails
        yield call_llm_fallback(question, context, is_followup)

def call_llm_fallback(question: str, context: str, is_followup: bool = False) -> str:
    """Fallback non-streaming LLM call."""
    if is_followup:
        prompt = (
            "You are a Non-QM glossary assistant.\n"
            "The user is asking for more details about a previous topic.\n"
            "Answer with additional information from the context.\n"
            "Keep it to 3 sentences max. Finish with this exact line:\n"
            f"{DISCLAIMER}\n\n"
            f"User: {question}\n"
            f"Context:\n{context}"
        )
        max_tokens = 150
    else:
        prompt = (
            "You are a Non-QM glossary assistant.\n"
            "Answer the user only with information in the context.\n"
            "Two sentences max. Finish with this exact line:\n"
            f"{DISCLAIMER}\n\n"
            f"User: {question}\n"
            f"Context:\n{context}"
        )
        max_tokens = 120

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "X-Title": "nonqm-glossary-bot"
    }

    resp = requests.post(
        "https://openrouter.ai/api/v1/chat/completions",
        headers=headers,
        json={
            "model": GPT_MODEL,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": max_tokens,
            "temperature": 0.3
        },
        timeout=60  # Increased timeout for OpenRouter stability
    )
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"].strip()

# ----- Enhanced Gradio callback with conversation memory -----
def chat_fn(message, history):
    # PII detection (compliance requirement)
    if contains_pii(message):
        yield "I cannot process messages containing personal information. Please ask about glossary terms only."
        return
    
    # Detect if this is a follow-up question
    is_followup = detect_followup_question(message)
    conversation_context = None
    
    if is_followup and history:
        # Get conversation context for better retrieval
        last_topic = extract_last_topic(history)
        if last_topic:
            conversation_context = last_topic
            # Try enhanced search with conversation context
            hits = retrieve(message, conversation_context)
        else:
            hits = retrieve(message)
    else:
        # Regular retrieval for new questions
        hits = retrieve(message)
    
    # Handle no results
    if not hits:
        if is_followup:
            yield "I don't have additional information on that topic in our glossary. Please ask a specific question about a Non-QM term, or contact a loan officer for more detailed assistance."
        else:
            yield "I'm not sure about that term. Please contact a loan officer for assistance with questions outside our glossary."
        return
    
    # Stream the response
    context = "\n---\n".join(hits)
    for partial_response in call_llm_streaming(message, context, is_followup):
        yield partial_response

# ----- Custom CSS for enhanced aesthetics -----
custom_theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="gray",
    neutral_hue="slate",
).set(
    body_background_fill="linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
    block_background_fill="*neutral_50",
    button_primary_background_fill="linear-gradient(90deg, #667eea 0%, #764ba2 100%)",
    button_primary_background_fill_hover="linear-gradient(90deg, #5a6fd8 0%, #6a4190 100%)",
)

custom_css = """
.gradio-container {
    max-width: 900px !important;
    margin: auto !important;
    border-radius: 15px !important;
    box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important;
}

.chat-message {
    border-radius: 12px !important;
    margin: 8px 0 !important;
    padding: 12px !important;
}

.message-wrap {
    max-width: 85% !important;
}

.user .message-wrap {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    color: white !important;
}

.bot .message-wrap {
    background: #f8f9fa !important;
    border: 1px solid #e9ecef !important;
}

.disclaimer {
    font-style: italic !important;
    color: #6c757d !important;
    border-top: 1px solid #dee2e6 !important;
    margin-top: 8px !important;
    padding-top: 8px !important;
}

/* Typing animation for streaming */
@keyframes typing {
    0% { opacity: 0.4; }
    50% { opacity: 1; }
    100% { opacity: 0.4; }
}

.streaming-text {
    animation: typing 1.5s infinite;
}
"""

# ----- Enhanced UI -----
with gr.Blocks(theme=custom_theme, css=custom_css, title="Non-QM Glossary Assistant") as demo:
    gr.HTML("""
    <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 12px; margin-bottom: 20px;">
        <h1 style="margin: 0; font-size: 2.5em; font-weight: 700;">🏠 Non-QM Glossary Assistant</h1>
        <p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.95;">
            Get instant, accurate definitions of Non-Qualified Mortgage terms
        </p>
    </div>
    """)
    
    gr.Markdown("""
    ### 💬 How to Use This Assistant
    
    - **Ask about Non-QM mortgage terms** and receive clear, accurate definitions
    - **Ask follow-up questions** like "tell me more" or "can you elaborate" for additional details
    - Questions outside our glossary scope will be directed to a loan officer  
    - All responses include required compliance disclaimers
    - **No personal information** should be shared in your questions
    
    **Example questions:** 
    - "What is a Non-QM loan?"
    - "Define debt-to-income ratio"
    - "What does DSCR mean?"
    - "Explain asset-based lending"
    - "Tell me more about that" (after asking about a term)
    """)
    
    chatbot = gr.ChatInterface(
        fn=chat_fn,
        title="Non-QM Glossary Assistant",
        description="Ask about Non-QM mortgage terms and get instant definitions. Follow-up questions welcome!",
        type="messages"
    )
    
    gr.HTML("""
    <div style="text-align: center; margin-top: 20px; padding: 20px; background: #dc3545; border: 2px solid #b02a37; border-radius: 12px; box-shadow: 0 4px 12px rgba(220, 53, 69, 0.3);">
        <p style="margin: 0; color: white; font-size: 1.1em; font-weight: 600; line-height: 1.4;">
            <strong>⚠️ IMPORTANT COMPLIANCE NOTICE:</strong><br><br>
            This assistant provides general information only and is NOT a commitment to lend.<br>
            For personalized advice, loan applications, or specific financial guidance,<br>
            please contact a qualified loan officer.
        </p>
    </div>
    """)

if __name__ == "__main__":
    demo.launch()