Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import faiss | |
| import numpy as np | |
| import requests | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| import openai | |
| import re | |
| import time | |
| # ---------- config ---------- | |
| EMBED_MODEL = "text-embedding-3-small" # OpenAI | |
| GPT_MODEL = "google/gemini-2.5-flash-preview-05-20" # OpenRouter | |
| SIM_THRESHOLD = 0.30 # tweak if recall is poor | |
| TOP_K = 3 | |
| DISCLAIMER = "General info only, not a commitment to lend." | |
| # ---------------------------- | |
| load_dotenv() | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") | |
| # ----- load glossary vectors ----- | |
| with open("chunks.json", encoding="utf8") as f: | |
| CHUNKS = json.load(f) | |
| INDEX = faiss.read_index("glossary.index") | |
| # ----- PII detection (compliance requirement) ----- | |
| def contains_pii(text: str) -> bool: | |
| """Basic PII detection for emails, SSNs, credit scores.""" | |
| email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' | |
| ssn_pattern = r'\b\d{3}-?\d{2}-?\d{4}\b' | |
| # Tightened credit score pattern to avoid false positives like "Form 4506-C" | |
| credit_pattern = r'\b(?:[4-8]\d{2})(?:\s*credit\s*score)?\b' | |
| return bool(re.search(email_pattern, text) or | |
| re.search(ssn_pattern, text) or | |
| re.search(credit_pattern, text)) | |
| # ----- conversation memory helpers ----- | |
| def detect_followup_question(question: str) -> bool: | |
| """Detect if a question is asking for elaboration or follow-up.""" | |
| followup_patterns = [ | |
| r'\b(elaborate|expand|explain more|tell me more|more details|further|additionally)\b', | |
| r'\b(can you|could you|would you).*(more|further|elaborate|expand)\b', | |
| r'\b(what about|how about|what else)\b', | |
| r'\b(that|this|it)\b.*\?', # References to previous topic | |
| r'^\s*(more|further|additionally|also)\b', | |
| r'\b(give me more|tell me more|say more)\b' | |
| ] | |
| question_lower = question.lower() | |
| return any(re.search(pattern, question_lower) for pattern in followup_patterns) | |
| def extract_last_topic(history): | |
| """Extract the main topic from the most recent bot response.""" | |
| if not history or len(history) == 0: | |
| return None | |
| # Get the last bot response | |
| last_exchange = history[-1] | |
| if isinstance(last_exchange, dict) and 'content' in last_exchange: | |
| last_response = last_exchange['content'] | |
| elif isinstance(last_exchange, list) and len(last_exchange) >= 2: | |
| last_response = last_exchange[1] # Bot response | |
| else: | |
| return None | |
| # Extract key terms from the response (before disclaimer) | |
| if DISCLAIMER in last_response: | |
| content = last_response.split(DISCLAIMER)[0].strip() | |
| else: | |
| content = last_response | |
| # Look for capitalized terms and common Non-QM keywords | |
| terms = re.findall(r'\b[A-Z][A-Za-z-]+(?:\s+[A-Z][A-Za-z-]+)*\b', content) | |
| nqm_keywords = ['Non-QM', 'DSCR', 'DTI', 'income', 'ratio', 'loan', 'mortgage', 'lending'] | |
| # Return the first meaningful term found | |
| for term in terms: | |
| if len(term) > 3 and any(keyword.lower() in term.lower() for keyword in nqm_keywords): | |
| return term | |
| return None | |
| # ----- helpers ----- | |
| def embed(text: str) -> np.ndarray: | |
| """Call OpenAI embedding endpoint and return a normalized float32 numpy vector.""" | |
| res = openai.embeddings.create( | |
| model=EMBED_MODEL, | |
| input=[text] | |
| ) | |
| vec = np.array(res.data[0].embedding, dtype="float32") | |
| # Normalize the vector for consistent similarity computation | |
| faiss.normalize_L2(vec.reshape(1, -1)) | |
| return vec | |
| def retrieve(question: str, conversation_context: str = None): | |
| """Return chunks whose cosine sim >= threshold, with optional conversation context.""" | |
| # Use conversation context for better retrieval if available | |
| search_query = question | |
| if conversation_context and detect_followup_question(question): | |
| search_query = f"{conversation_context} {question}" | |
| vec = embed(search_query).reshape(1, -1) | |
| scores, ids = INDEX.search(vec, TOP_K) | |
| relevant_chunks = [ | |
| CHUNKS[i] | |
| for i, s in zip(ids[0], scores[0]) | |
| if s >= SIM_THRESHOLD | |
| ] | |
| # If no results with conversation context, try just the question | |
| if not relevant_chunks and conversation_context: | |
| vec = embed(question).reshape(1, -1) | |
| scores, ids = INDEX.search(vec, TOP_K) | |
| relevant_chunks = [ | |
| CHUNKS[i] | |
| for i, s in zip(ids[0], scores[0]) | |
| if s >= SIM_THRESHOLD | |
| ] | |
| return relevant_chunks | |
| def call_llm_streaming(question: str, context: str, is_followup: bool = False): | |
| """Stream LLM response while ensuring compliance.""" | |
| # Adjust prompt for follow-up questions | |
| if is_followup: | |
| prompt = ( | |
| "You are a Non-QM glossary assistant.\n" | |
| "The user is asking for more details about a previous topic.\n" | |
| "Answer with additional information from the context.\n" | |
| "Keep it to 3 sentences max. Finish with this exact line:\n" | |
| f"{DISCLAIMER}\n\n" | |
| f"User: {question}\n" | |
| f"Context:\n{context}" | |
| ) | |
| max_tokens = 150 # Allow slightly more for elaboration | |
| else: | |
| prompt = ( | |
| "You are a Non-QM glossary assistant.\n" | |
| "Answer the user only with information in the context.\n" | |
| "Two sentences max. Finish with this exact line:\n" | |
| f"{DISCLAIMER}\n\n" | |
| f"User: {question}\n" | |
| f"Context:\n{context}" | |
| ) | |
| max_tokens = 120 | |
| headers = { | |
| "Authorization": f"Bearer {OPENROUTER_API_KEY}", | |
| "X-Title": "nonqm-glossary-bot" | |
| } | |
| try: | |
| resp = requests.post( | |
| "https://openrouter.ai/api/v1/chat/completions", | |
| headers=headers, | |
| json={ | |
| "model": GPT_MODEL, | |
| "messages": [{"role": "user", "content": prompt}], | |
| "max_tokens": max_tokens, | |
| "temperature": 0.3, | |
| "stream": True | |
| }, | |
| timeout=60, # Increased timeout for OpenRouter stability | |
| stream=True | |
| ) | |
| resp.raise_for_status() | |
| accumulated_text = "" | |
| for line in resp.iter_lines(): | |
| if line: | |
| line = line.decode('utf-8') | |
| if line.startswith('data: '): | |
| line = line[6:] | |
| if line.strip() == '[DONE]': | |
| break | |
| try: | |
| data = json.loads(line) | |
| if 'choices' in data and len(data['choices']) > 0: | |
| delta = data['choices'][0].get('delta', {}) | |
| if 'content' in delta: | |
| content = delta['content'] | |
| accumulated_text += content | |
| yield accumulated_text | |
| time.sleep(0.02) # Small delay for smooth streaming | |
| except json.JSONDecodeError: | |
| continue | |
| except Exception as e: | |
| # Fallback to non-streaming if streaming fails | |
| yield call_llm_fallback(question, context, is_followup) | |
| def call_llm_fallback(question: str, context: str, is_followup: bool = False) -> str: | |
| """Fallback non-streaming LLM call.""" | |
| if is_followup: | |
| prompt = ( | |
| "You are a Non-QM glossary assistant.\n" | |
| "The user is asking for more details about a previous topic.\n" | |
| "Answer with additional information from the context.\n" | |
| "Keep it to 3 sentences max. Finish with this exact line:\n" | |
| f"{DISCLAIMER}\n\n" | |
| f"User: {question}\n" | |
| f"Context:\n{context}" | |
| ) | |
| max_tokens = 150 | |
| else: | |
| prompt = ( | |
| "You are a Non-QM glossary assistant.\n" | |
| "Answer the user only with information in the context.\n" | |
| "Two sentences max. Finish with this exact line:\n" | |
| f"{DISCLAIMER}\n\n" | |
| f"User: {question}\n" | |
| f"Context:\n{context}" | |
| ) | |
| max_tokens = 120 | |
| headers = { | |
| "Authorization": f"Bearer {OPENROUTER_API_KEY}", | |
| "X-Title": "nonqm-glossary-bot" | |
| } | |
| resp = requests.post( | |
| "https://openrouter.ai/api/v1/chat/completions", | |
| headers=headers, | |
| json={ | |
| "model": GPT_MODEL, | |
| "messages": [{"role": "user", "content": prompt}], | |
| "max_tokens": max_tokens, | |
| "temperature": 0.3 | |
| }, | |
| timeout=60 # Increased timeout for OpenRouter stability | |
| ) | |
| resp.raise_for_status() | |
| return resp.json()["choices"][0]["message"]["content"].strip() | |
| # ----- Enhanced Gradio callback with conversation memory ----- | |
| def chat_fn(message, history): | |
| # PII detection (compliance requirement) | |
| if contains_pii(message): | |
| yield "I cannot process messages containing personal information. Please ask about glossary terms only." | |
| return | |
| # Detect if this is a follow-up question | |
| is_followup = detect_followup_question(message) | |
| conversation_context = None | |
| if is_followup and history: | |
| # Get conversation context for better retrieval | |
| last_topic = extract_last_topic(history) | |
| if last_topic: | |
| conversation_context = last_topic | |
| # Try enhanced search with conversation context | |
| hits = retrieve(message, conversation_context) | |
| else: | |
| hits = retrieve(message) | |
| else: | |
| # Regular retrieval for new questions | |
| hits = retrieve(message) | |
| # Handle no results | |
| if not hits: | |
| if is_followup: | |
| yield "I don't have additional information on that topic in our glossary. Please ask a specific question about a Non-QM term, or contact a loan officer for more detailed assistance." | |
| else: | |
| yield "I'm not sure about that term. Please contact a loan officer for assistance with questions outside our glossary." | |
| return | |
| # Stream the response | |
| context = "\n---\n".join(hits) | |
| for partial_response in call_llm_streaming(message, context, is_followup): | |
| yield partial_response | |
| # ----- Custom CSS for enhanced aesthetics ----- | |
| custom_theme = gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="gray", | |
| neutral_hue="slate", | |
| ).set( | |
| body_background_fill="linear-gradient(135deg, #667eea 0%, #764ba2 100%)", | |
| block_background_fill="*neutral_50", | |
| button_primary_background_fill="linear-gradient(90deg, #667eea 0%, #764ba2 100%)", | |
| button_primary_background_fill_hover="linear-gradient(90deg, #5a6fd8 0%, #6a4190 100%)", | |
| ) | |
| custom_css = """ | |
| .gradio-container { | |
| max-width: 900px !important; | |
| margin: auto !important; | |
| border-radius: 15px !important; | |
| box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important; | |
| } | |
| .chat-message { | |
| border-radius: 12px !important; | |
| margin: 8px 0 !important; | |
| padding: 12px !important; | |
| } | |
| .message-wrap { | |
| max-width: 85% !important; | |
| } | |
| .user .message-wrap { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | |
| color: white !important; | |
| } | |
| .bot .message-wrap { | |
| background: #f8f9fa !important; | |
| border: 1px solid #e9ecef !important; | |
| } | |
| .disclaimer { | |
| font-style: italic !important; | |
| color: #6c757d !important; | |
| border-top: 1px solid #dee2e6 !important; | |
| margin-top: 8px !important; | |
| padding-top: 8px !important; | |
| } | |
| /* Typing animation for streaming */ | |
| @keyframes typing { | |
| 0% { opacity: 0.4; } | |
| 50% { opacity: 1; } | |
| 100% { opacity: 0.4; } | |
| } | |
| .streaming-text { | |
| animation: typing 1.5s infinite; | |
| } | |
| """ | |
| # ----- Enhanced UI ----- | |
| with gr.Blocks(theme=custom_theme, css=custom_css, title="Non-QM Glossary Assistant") as demo: | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 12px; margin-bottom: 20px;"> | |
| <h1 style="margin: 0; font-size: 2.5em; font-weight: 700;">🏠 Non-QM Glossary Assistant</h1> | |
| <p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.95;"> | |
| Get instant, accurate definitions of Non-Qualified Mortgage terms | |
| </p> | |
| </div> | |
| """) | |
| gr.Markdown(""" | |
| ### 💬 How to Use This Assistant | |
| - **Ask about Non-QM mortgage terms** and receive clear, accurate definitions | |
| - **Ask follow-up questions** like "tell me more" or "can you elaborate" for additional details | |
| - Questions outside our glossary scope will be directed to a loan officer | |
| - All responses include required compliance disclaimers | |
| - **No personal information** should be shared in your questions | |
| **Example questions:** | |
| - "What is a Non-QM loan?" | |
| - "Define debt-to-income ratio" | |
| - "What does DSCR mean?" | |
| - "Explain asset-based lending" | |
| - "Tell me more about that" (after asking about a term) | |
| """) | |
| chatbot = gr.ChatInterface( | |
| fn=chat_fn, | |
| title="Non-QM Glossary Assistant", | |
| description="Ask about Non-QM mortgage terms and get instant definitions. Follow-up questions welcome!", | |
| type="messages" | |
| ) | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-top: 20px; padding: 20px; background: #dc3545; border: 2px solid #b02a37; border-radius: 12px; box-shadow: 0 4px 12px rgba(220, 53, 69, 0.3);"> | |
| <p style="margin: 0; color: white; font-size: 1.1em; font-weight: 600; line-height: 1.4;"> | |
| <strong>⚠️ IMPORTANT COMPLIANCE NOTICE:</strong><br><br> | |
| This assistant provides general information only and is NOT a commitment to lend.<br> | |
| For personalized advice, loan applications, or specific financial guidance,<br> | |
| please contact a qualified loan officer. | |
| </p> | |
| </div> | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |