digitChatBot / app.py
paradox44's picture
Upload 7 files
bd7261b verified
import os
import json
import faiss
import numpy as np
import requests
import gradio as gr
from dotenv import load_dotenv
import openai
import re
import time
# ---------- config ----------
EMBED_MODEL = "text-embedding-3-small" # OpenAI
GPT_MODEL = "google/gemini-2.5-flash-preview-05-20" # OpenRouter
SIM_THRESHOLD = 0.30 # tweak if recall is poor
TOP_K = 3
DISCLAIMER = "General info only, not a commitment to lend."
# ----------------------------
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
# ----- load glossary vectors -----
with open("chunks.json", encoding="utf8") as f:
CHUNKS = json.load(f)
INDEX = faiss.read_index("glossary.index")
# ----- PII detection (compliance requirement) -----
def contains_pii(text: str) -> bool:
"""Basic PII detection for emails, SSNs, credit scores."""
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
ssn_pattern = r'\b\d{3}-?\d{2}-?\d{4}\b'
# Tightened credit score pattern to avoid false positives like "Form 4506-C"
credit_pattern = r'\b(?:[4-8]\d{2})(?:\s*credit\s*score)?\b'
return bool(re.search(email_pattern, text) or
re.search(ssn_pattern, text) or
re.search(credit_pattern, text))
# ----- conversation memory helpers -----
def detect_followup_question(question: str) -> bool:
"""Detect if a question is asking for elaboration or follow-up."""
followup_patterns = [
r'\b(elaborate|expand|explain more|tell me more|more details|further|additionally)\b',
r'\b(can you|could you|would you).*(more|further|elaborate|expand)\b',
r'\b(what about|how about|what else)\b',
r'\b(that|this|it)\b.*\?', # References to previous topic
r'^\s*(more|further|additionally|also)\b',
r'\b(give me more|tell me more|say more)\b'
]
question_lower = question.lower()
return any(re.search(pattern, question_lower) for pattern in followup_patterns)
def extract_last_topic(history):
"""Extract the main topic from the most recent bot response."""
if not history or len(history) == 0:
return None
# Get the last bot response
last_exchange = history[-1]
if isinstance(last_exchange, dict) and 'content' in last_exchange:
last_response = last_exchange['content']
elif isinstance(last_exchange, list) and len(last_exchange) >= 2:
last_response = last_exchange[1] # Bot response
else:
return None
# Extract key terms from the response (before disclaimer)
if DISCLAIMER in last_response:
content = last_response.split(DISCLAIMER)[0].strip()
else:
content = last_response
# Look for capitalized terms and common Non-QM keywords
terms = re.findall(r'\b[A-Z][A-Za-z-]+(?:\s+[A-Z][A-Za-z-]+)*\b', content)
nqm_keywords = ['Non-QM', 'DSCR', 'DTI', 'income', 'ratio', 'loan', 'mortgage', 'lending']
# Return the first meaningful term found
for term in terms:
if len(term) > 3 and any(keyword.lower() in term.lower() for keyword in nqm_keywords):
return term
return None
# ----- helpers -----
def embed(text: str) -> np.ndarray:
"""Call OpenAI embedding endpoint and return a normalized float32 numpy vector."""
res = openai.embeddings.create(
model=EMBED_MODEL,
input=[text]
)
vec = np.array(res.data[0].embedding, dtype="float32")
# Normalize the vector for consistent similarity computation
faiss.normalize_L2(vec.reshape(1, -1))
return vec
def retrieve(question: str, conversation_context: str = None):
"""Return chunks whose cosine sim >= threshold, with optional conversation context."""
# Use conversation context for better retrieval if available
search_query = question
if conversation_context and detect_followup_question(question):
search_query = f"{conversation_context} {question}"
vec = embed(search_query).reshape(1, -1)
scores, ids = INDEX.search(vec, TOP_K)
relevant_chunks = [
CHUNKS[i]
for i, s in zip(ids[0], scores[0])
if s >= SIM_THRESHOLD
]
# If no results with conversation context, try just the question
if not relevant_chunks and conversation_context:
vec = embed(question).reshape(1, -1)
scores, ids = INDEX.search(vec, TOP_K)
relevant_chunks = [
CHUNKS[i]
for i, s in zip(ids[0], scores[0])
if s >= SIM_THRESHOLD
]
return relevant_chunks
def call_llm_streaming(question: str, context: str, is_followup: bool = False):
"""Stream LLM response while ensuring compliance."""
# Adjust prompt for follow-up questions
if is_followup:
prompt = (
"You are a Non-QM glossary assistant.\n"
"The user is asking for more details about a previous topic.\n"
"Answer with additional information from the context.\n"
"Keep it to 3 sentences max. Finish with this exact line:\n"
f"{DISCLAIMER}\n\n"
f"User: {question}\n"
f"Context:\n{context}"
)
max_tokens = 150 # Allow slightly more for elaboration
else:
prompt = (
"You are a Non-QM glossary assistant.\n"
"Answer the user only with information in the context.\n"
"Two sentences max. Finish with this exact line:\n"
f"{DISCLAIMER}\n\n"
f"User: {question}\n"
f"Context:\n{context}"
)
max_tokens = 120
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"X-Title": "nonqm-glossary-bot"
}
try:
resp = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
json={
"model": GPT_MODEL,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": max_tokens,
"temperature": 0.3,
"stream": True
},
timeout=60, # Increased timeout for OpenRouter stability
stream=True
)
resp.raise_for_status()
accumulated_text = ""
for line in resp.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
line = line[6:]
if line.strip() == '[DONE]':
break
try:
data = json.loads(line)
if 'choices' in data and len(data['choices']) > 0:
delta = data['choices'][0].get('delta', {})
if 'content' in delta:
content = delta['content']
accumulated_text += content
yield accumulated_text
time.sleep(0.02) # Small delay for smooth streaming
except json.JSONDecodeError:
continue
except Exception as e:
# Fallback to non-streaming if streaming fails
yield call_llm_fallback(question, context, is_followup)
def call_llm_fallback(question: str, context: str, is_followup: bool = False) -> str:
"""Fallback non-streaming LLM call."""
if is_followup:
prompt = (
"You are a Non-QM glossary assistant.\n"
"The user is asking for more details about a previous topic.\n"
"Answer with additional information from the context.\n"
"Keep it to 3 sentences max. Finish with this exact line:\n"
f"{DISCLAIMER}\n\n"
f"User: {question}\n"
f"Context:\n{context}"
)
max_tokens = 150
else:
prompt = (
"You are a Non-QM glossary assistant.\n"
"Answer the user only with information in the context.\n"
"Two sentences max. Finish with this exact line:\n"
f"{DISCLAIMER}\n\n"
f"User: {question}\n"
f"Context:\n{context}"
)
max_tokens = 120
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"X-Title": "nonqm-glossary-bot"
}
resp = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
json={
"model": GPT_MODEL,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": max_tokens,
"temperature": 0.3
},
timeout=60 # Increased timeout for OpenRouter stability
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"].strip()
# ----- Enhanced Gradio callback with conversation memory -----
def chat_fn(message, history):
# PII detection (compliance requirement)
if contains_pii(message):
yield "I cannot process messages containing personal information. Please ask about glossary terms only."
return
# Detect if this is a follow-up question
is_followup = detect_followup_question(message)
conversation_context = None
if is_followup and history:
# Get conversation context for better retrieval
last_topic = extract_last_topic(history)
if last_topic:
conversation_context = last_topic
# Try enhanced search with conversation context
hits = retrieve(message, conversation_context)
else:
hits = retrieve(message)
else:
# Regular retrieval for new questions
hits = retrieve(message)
# Handle no results
if not hits:
if is_followup:
yield "I don't have additional information on that topic in our glossary. Please ask a specific question about a Non-QM term, or contact a loan officer for more detailed assistance."
else:
yield "I'm not sure about that term. Please contact a loan officer for assistance with questions outside our glossary."
return
# Stream the response
context = "\n---\n".join(hits)
for partial_response in call_llm_streaming(message, context, is_followup):
yield partial_response
# ----- Custom CSS for enhanced aesthetics -----
custom_theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="gray",
neutral_hue="slate",
).set(
body_background_fill="linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
block_background_fill="*neutral_50",
button_primary_background_fill="linear-gradient(90deg, #667eea 0%, #764ba2 100%)",
button_primary_background_fill_hover="linear-gradient(90deg, #5a6fd8 0%, #6a4190 100%)",
)
custom_css = """
.gradio-container {
max-width: 900px !important;
margin: auto !important;
border-radius: 15px !important;
box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important;
}
.chat-message {
border-radius: 12px !important;
margin: 8px 0 !important;
padding: 12px !important;
}
.message-wrap {
max-width: 85% !important;
}
.user .message-wrap {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
color: white !important;
}
.bot .message-wrap {
background: #f8f9fa !important;
border: 1px solid #e9ecef !important;
}
.disclaimer {
font-style: italic !important;
color: #6c757d !important;
border-top: 1px solid #dee2e6 !important;
margin-top: 8px !important;
padding-top: 8px !important;
}
/* Typing animation for streaming */
@keyframes typing {
0% { opacity: 0.4; }
50% { opacity: 1; }
100% { opacity: 0.4; }
}
.streaming-text {
animation: typing 1.5s infinite;
}
"""
# ----- Enhanced UI -----
with gr.Blocks(theme=custom_theme, css=custom_css, title="Non-QM Glossary Assistant") as demo:
gr.HTML("""
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 12px; margin-bottom: 20px;">
<h1 style="margin: 0; font-size: 2.5em; font-weight: 700;">🏠 Non-QM Glossary Assistant</h1>
<p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.95;">
Get instant, accurate definitions of Non-Qualified Mortgage terms
</p>
</div>
""")
gr.Markdown("""
### 💬 How to Use This Assistant
- **Ask about Non-QM mortgage terms** and receive clear, accurate definitions
- **Ask follow-up questions** like "tell me more" or "can you elaborate" for additional details
- Questions outside our glossary scope will be directed to a loan officer
- All responses include required compliance disclaimers
- **No personal information** should be shared in your questions
**Example questions:**
- "What is a Non-QM loan?"
- "Define debt-to-income ratio"
- "What does DSCR mean?"
- "Explain asset-based lending"
- "Tell me more about that" (after asking about a term)
""")
chatbot = gr.ChatInterface(
fn=chat_fn,
title="Non-QM Glossary Assistant",
description="Ask about Non-QM mortgage terms and get instant definitions. Follow-up questions welcome!",
type="messages"
)
gr.HTML("""
<div style="text-align: center; margin-top: 20px; padding: 20px; background: #dc3545; border: 2px solid #b02a37; border-radius: 12px; box-shadow: 0 4px 12px rgba(220, 53, 69, 0.3);">
<p style="margin: 0; color: white; font-size: 1.1em; font-weight: 600; line-height: 1.4;">
<strong>⚠️ IMPORTANT COMPLIANCE NOTICE:</strong><br><br>
This assistant provides general information only and is NOT a commitment to lend.<br>
For personalized advice, loan applications, or specific financial guidance,<br>
please contact a qualified loan officer.
</p>
</div>
""")
if __name__ == "__main__":
demo.launch()