import os import json import faiss import numpy as np import requests import gradio as gr from dotenv import load_dotenv import openai import re import time # ---------- config ---------- EMBED_MODEL = "text-embedding-3-small" # OpenAI GPT_MODEL = "google/gemini-2.5-flash-preview-05-20" # OpenRouter SIM_THRESHOLD = 0.30 # tweak if recall is poor TOP_K = 3 DISCLAIMER = "General info only, not a commitment to lend." # ---------------------------- load_dotenv() openai.api_key = os.getenv("OPENAI_API_KEY") OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") # ----- load glossary vectors ----- with open("chunks.json", encoding="utf8") as f: CHUNKS = json.load(f) INDEX = faiss.read_index("glossary.index") # ----- PII detection (compliance requirement) ----- def contains_pii(text: str) -> bool: """Basic PII detection for emails, SSNs, credit scores.""" email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' ssn_pattern = r'\b\d{3}-?\d{2}-?\d{4}\b' # Tightened credit score pattern to avoid false positives like "Form 4506-C" credit_pattern = r'\b(?:[4-8]\d{2})(?:\s*credit\s*score)?\b' return bool(re.search(email_pattern, text) or re.search(ssn_pattern, text) or re.search(credit_pattern, text)) # ----- conversation memory helpers ----- def detect_followup_question(question: str) -> bool: """Detect if a question is asking for elaboration or follow-up.""" followup_patterns = [ r'\b(elaborate|expand|explain more|tell me more|more details|further|additionally)\b', r'\b(can you|could you|would you).*(more|further|elaborate|expand)\b', r'\b(what about|how about|what else)\b', r'\b(that|this|it)\b.*\?', # References to previous topic r'^\s*(more|further|additionally|also)\b', r'\b(give me more|tell me more|say more)\b' ] question_lower = question.lower() return any(re.search(pattern, question_lower) for pattern in followup_patterns) def extract_last_topic(history): """Extract the main topic from the most recent bot response.""" if not history or len(history) == 0: return None # Get the last bot response last_exchange = history[-1] if isinstance(last_exchange, dict) and 'content' in last_exchange: last_response = last_exchange['content'] elif isinstance(last_exchange, list) and len(last_exchange) >= 2: last_response = last_exchange[1] # Bot response else: return None # Extract key terms from the response (before disclaimer) if DISCLAIMER in last_response: content = last_response.split(DISCLAIMER)[0].strip() else: content = last_response # Look for capitalized terms and common Non-QM keywords terms = re.findall(r'\b[A-Z][A-Za-z-]+(?:\s+[A-Z][A-Za-z-]+)*\b', content) nqm_keywords = ['Non-QM', 'DSCR', 'DTI', 'income', 'ratio', 'loan', 'mortgage', 'lending'] # Return the first meaningful term found for term in terms: if len(term) > 3 and any(keyword.lower() in term.lower() for keyword in nqm_keywords): return term return None # ----- helpers ----- def embed(text: str) -> np.ndarray: """Call OpenAI embedding endpoint and return a normalized float32 numpy vector.""" res = openai.embeddings.create( model=EMBED_MODEL, input=[text] ) vec = np.array(res.data[0].embedding, dtype="float32") # Normalize the vector for consistent similarity computation faiss.normalize_L2(vec.reshape(1, -1)) return vec def retrieve(question: str, conversation_context: str = None): """Return chunks whose cosine sim >= threshold, with optional conversation context.""" # Use conversation context for better retrieval if available search_query = question if conversation_context and detect_followup_question(question): search_query = f"{conversation_context} {question}" vec = embed(search_query).reshape(1, -1) scores, ids = INDEX.search(vec, TOP_K) relevant_chunks = [ CHUNKS[i] for i, s in zip(ids[0], scores[0]) if s >= SIM_THRESHOLD ] # If no results with conversation context, try just the question if not relevant_chunks and conversation_context: vec = embed(question).reshape(1, -1) scores, ids = INDEX.search(vec, TOP_K) relevant_chunks = [ CHUNKS[i] for i, s in zip(ids[0], scores[0]) if s >= SIM_THRESHOLD ] return relevant_chunks def call_llm_streaming(question: str, context: str, is_followup: bool = False): """Stream LLM response while ensuring compliance.""" # Adjust prompt for follow-up questions if is_followup: prompt = ( "You are a Non-QM glossary assistant.\n" "The user is asking for more details about a previous topic.\n" "Answer with additional information from the context.\n" "Keep it to 3 sentences max. Finish with this exact line:\n" f"{DISCLAIMER}\n\n" f"User: {question}\n" f"Context:\n{context}" ) max_tokens = 150 # Allow slightly more for elaboration else: prompt = ( "You are a Non-QM glossary assistant.\n" "Answer the user only with information in the context.\n" "Two sentences max. Finish with this exact line:\n" f"{DISCLAIMER}\n\n" f"User: {question}\n" f"Context:\n{context}" ) max_tokens = 120 headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "X-Title": "nonqm-glossary-bot" } try: resp = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers=headers, json={ "model": GPT_MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens, "temperature": 0.3, "stream": True }, timeout=60, # Increased timeout for OpenRouter stability stream=True ) resp.raise_for_status() accumulated_text = "" for line in resp.iter_lines(): if line: line = line.decode('utf-8') if line.startswith('data: '): line = line[6:] if line.strip() == '[DONE]': break try: data = json.loads(line) if 'choices' in data and len(data['choices']) > 0: delta = data['choices'][0].get('delta', {}) if 'content' in delta: content = delta['content'] accumulated_text += content yield accumulated_text time.sleep(0.02) # Small delay for smooth streaming except json.JSONDecodeError: continue except Exception as e: # Fallback to non-streaming if streaming fails yield call_llm_fallback(question, context, is_followup) def call_llm_fallback(question: str, context: str, is_followup: bool = False) -> str: """Fallback non-streaming LLM call.""" if is_followup: prompt = ( "You are a Non-QM glossary assistant.\n" "The user is asking for more details about a previous topic.\n" "Answer with additional information from the context.\n" "Keep it to 3 sentences max. Finish with this exact line:\n" f"{DISCLAIMER}\n\n" f"User: {question}\n" f"Context:\n{context}" ) max_tokens = 150 else: prompt = ( "You are a Non-QM glossary assistant.\n" "Answer the user only with information in the context.\n" "Two sentences max. Finish with this exact line:\n" f"{DISCLAIMER}\n\n" f"User: {question}\n" f"Context:\n{context}" ) max_tokens = 120 headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "X-Title": "nonqm-glossary-bot" } resp = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers=headers, json={ "model": GPT_MODEL, "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens, "temperature": 0.3 }, timeout=60 # Increased timeout for OpenRouter stability ) resp.raise_for_status() return resp.json()["choices"][0]["message"]["content"].strip() # ----- Enhanced Gradio callback with conversation memory ----- def chat_fn(message, history): # PII detection (compliance requirement) if contains_pii(message): yield "I cannot process messages containing personal information. Please ask about glossary terms only." return # Detect if this is a follow-up question is_followup = detect_followup_question(message) conversation_context = None if is_followup and history: # Get conversation context for better retrieval last_topic = extract_last_topic(history) if last_topic: conversation_context = last_topic # Try enhanced search with conversation context hits = retrieve(message, conversation_context) else: hits = retrieve(message) else: # Regular retrieval for new questions hits = retrieve(message) # Handle no results if not hits: if is_followup: yield "I don't have additional information on that topic in our glossary. Please ask a specific question about a Non-QM term, or contact a loan officer for more detailed assistance." else: yield "I'm not sure about that term. Please contact a loan officer for assistance with questions outside our glossary." return # Stream the response context = "\n---\n".join(hits) for partial_response in call_llm_streaming(message, context, is_followup): yield partial_response # ----- Custom CSS for enhanced aesthetics ----- custom_theme = gr.themes.Soft( primary_hue="blue", secondary_hue="gray", neutral_hue="slate", ).set( body_background_fill="linear-gradient(135deg, #667eea 0%, #764ba2 100%)", block_background_fill="*neutral_50", button_primary_background_fill="linear-gradient(90deg, #667eea 0%, #764ba2 100%)", button_primary_background_fill_hover="linear-gradient(90deg, #5a6fd8 0%, #6a4190 100%)", ) custom_css = """ .gradio-container { max-width: 900px !important; margin: auto !important; border-radius: 15px !important; box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important; } .chat-message { border-radius: 12px !important; margin: 8px 0 !important; padding: 12px !important; } .message-wrap { max-width: 85% !important; } .user .message-wrap { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; color: white !important; } .bot .message-wrap { background: #f8f9fa !important; border: 1px solid #e9ecef !important; } .disclaimer { font-style: italic !important; color: #6c757d !important; border-top: 1px solid #dee2e6 !important; margin-top: 8px !important; padding-top: 8px !important; } /* Typing animation for streaming */ @keyframes typing { 0% { opacity: 0.4; } 50% { opacity: 1; } 100% { opacity: 0.4; } } .streaming-text { animation: typing 1.5s infinite; } """ # ----- Enhanced UI ----- with gr.Blocks(theme=custom_theme, css=custom_css, title="Non-QM Glossary Assistant") as demo: gr.HTML("""

🏠 Non-QM Glossary Assistant

Get instant, accurate definitions of Non-Qualified Mortgage terms

""") gr.Markdown(""" ### 💬 How to Use This Assistant - **Ask about Non-QM mortgage terms** and receive clear, accurate definitions - **Ask follow-up questions** like "tell me more" or "can you elaborate" for additional details - Questions outside our glossary scope will be directed to a loan officer - All responses include required compliance disclaimers - **No personal information** should be shared in your questions **Example questions:** - "What is a Non-QM loan?" - "Define debt-to-income ratio" - "What does DSCR mean?" - "Explain asset-based lending" - "Tell me more about that" (after asking about a term) """) chatbot = gr.ChatInterface( fn=chat_fn, title="Non-QM Glossary Assistant", description="Ask about Non-QM mortgage terms and get instant definitions. Follow-up questions welcome!", type="messages" ) gr.HTML("""

⚠️ IMPORTANT COMPLIANCE NOTICE:

This assistant provides general information only and is NOT a commitment to lend.
For personalized advice, loan applications, or specific financial guidance,
please contact a qualified loan officer.

""") if __name__ == "__main__": demo.launch()