Spaces:

paradox44
/

digitChatBot

Sleeping

App Files Files Community

digitChatBot / app.py

paradox44

Upload 7 files

bd7261b verified 8 months ago

raw

history blame contribute delete

14.4 kB

	import os
	import json
	import faiss
	import numpy as np
	import requests
	import gradio as gr
	from dotenv import load_dotenv
	import openai
	import re
	import time

	# ---------- config ----------
	EMBED_MODEL = "text-embedding-3-small" # OpenAI
	GPT_MODEL = "google/gemini-2.5-flash-preview-05-20" # OpenRouter
	SIM_THRESHOLD = 0.30 # tweak if recall is poor
	TOP_K = 3
	DISCLAIMER = "General info only, not a commitment to lend."
	# ----------------------------

	load_dotenv()
	openai.api_key = os.getenv("OPENAI_API_KEY")
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

	# ----- load glossary vectors -----
	with open("chunks.json", encoding="utf8") as f:
	CHUNKS = json.load(f)

	INDEX = faiss.read_index("glossary.index")

	# ----- PII detection (compliance requirement) -----
	def contains_pii(text: str) -> bool:
	"""Basic PII detection for emails, SSNs, credit scores."""
	email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z\|a-z]{2,}\b'
	ssn_pattern = r'\b\d{3}-?\d{2}-?\d{4}\b'
	# Tightened credit score pattern to avoid false positives like "Form 4506-C"
	credit_pattern = r'\b(?:[4-8]\d{2})(?:\scredit\sscore)?\b'

	return bool(re.search(email_pattern, text) or
	re.search(ssn_pattern, text) or
	re.search(credit_pattern, text))

	# ----- conversation memory helpers -----
	def detect_followup_question(question: str) -> bool:
	"""Detect if a question is asking for elaboration or follow-up."""
	followup_patterns = [
	r'\b(elaborate\|expand\|explain more\|tell me more\|more details\|further\|additionally)\b',
	r'\b(can you\|could you\|would you).*(more\|further\|elaborate\|expand)\b',
	r'\b(what about\|how about\|what else)\b',
	r'\b(that\|this\|it)\b.*\?', # References to previous topic
	r'^\s*(more\|further\|additionally\|also)\b',
	r'\b(give me more\|tell me more\|say more)\b'
	]

	question_lower = question.lower()
	return any(re.search(pattern, question_lower) for pattern in followup_patterns)

	def extract_last_topic(history):
	"""Extract the main topic from the most recent bot response."""
	if not history or len(history) == 0:
	return None

	# Get the last bot response
	last_exchange = history[-1]
	if isinstance(last_exchange, dict) and 'content' in last_exchange:
	last_response = last_exchange['content']
	elif isinstance(last_exchange, list) and len(last_exchange) >= 2:
	last_response = last_exchange[1] # Bot response
	else:
	return None

	# Extract key terms from the response (before disclaimer)
	if DISCLAIMER in last_response:
	content = last_response.split(DISCLAIMER)[0].strip()
	else:
	content = last_response

	# Look for capitalized terms and common Non-QM keywords
	terms = re.findall(r'\b[A-Z][A-Za-z-]+(?:\s+[A-Z][A-Za-z-]+)*\b', content)
	nqm_keywords = ['Non-QM', 'DSCR', 'DTI', 'income', 'ratio', 'loan', 'mortgage', 'lending']

	# Return the first meaningful term found
	for term in terms:
	if len(term) > 3 and any(keyword.lower() in term.lower() for keyword in nqm_keywords):
	return term

	return None

	# ----- helpers -----
	def embed(text: str) -> np.ndarray:
	"""Call OpenAI embedding endpoint and return a normalized float32 numpy vector."""
	res = openai.embeddings.create(
	model=EMBED_MODEL,
	input=[text]
	)
	vec = np.array(res.data[0].embedding, dtype="float32")
	# Normalize the vector for consistent similarity computation
	faiss.normalize_L2(vec.reshape(1, -1))
	return vec

	def retrieve(question: str, conversation_context: str = None):
	"""Return chunks whose cosine sim >= threshold, with optional conversation context."""
	# Use conversation context for better retrieval if available
	search_query = question
	if conversation_context and detect_followup_question(question):
	search_query = f"{conversation_context} {question}"

	vec = embed(search_query).reshape(1, -1)
	scores, ids = INDEX.search(vec, TOP_K)

	relevant_chunks = [
	CHUNKS[i]
	for i, s in zip(ids[0], scores[0])
	if s >= SIM_THRESHOLD
	]

	# If no results with conversation context, try just the question
	if not relevant_chunks and conversation_context:
	vec = embed(question).reshape(1, -1)
	scores, ids = INDEX.search(vec, TOP_K)
	relevant_chunks = [
	CHUNKS[i]
	for i, s in zip(ids[0], scores[0])
	if s >= SIM_THRESHOLD
	]

	return relevant_chunks

	def call_llm_streaming(question: str, context: str, is_followup: bool = False):
	"""Stream LLM response while ensuring compliance."""
	# Adjust prompt for follow-up questions
	if is_followup:
	prompt = (
	"You are a Non-QM glossary assistant.\n"
	"The user is asking for more details about a previous topic.\n"
	"Answer with additional information from the context.\n"
	"Keep it to 3 sentences max. Finish with this exact line:\n"
	f"{DISCLAIMER}\n\n"
	f"User: {question}\n"
	f"Context:\n{context}"
	)
	max_tokens = 150 # Allow slightly more for elaboration
	else:
	prompt = (
	"You are a Non-QM glossary assistant.\n"
	"Answer the user only with information in the context.\n"
	"Two sentences max. Finish with this exact line:\n"
	f"{DISCLAIMER}\n\n"
	f"User: {question}\n"
	f"Context:\n{context}"
	)
	max_tokens = 120

	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"X-Title": "nonqm-glossary-bot"
	}

	try:
	resp = requests.post(
	"https://openrouter.ai/api/v1/chat/completions",
	headers=headers,
	json={
	"model": GPT_MODEL,
	"messages": [{"role": "user", "content": prompt}],
	"max_tokens": max_tokens,
	"temperature": 0.3,
	"stream": True
	},
	timeout=60, # Increased timeout for OpenRouter stability
	stream=True
	)
	resp.raise_for_status()

	accumulated_text = ""
	for line in resp.iter_lines():
	if line:
	line = line.decode('utf-8')
	if line.startswith('data: '):
	line = line[6:]
	if line.strip() == '[DONE]':
	break
	try:
	data = json.loads(line)
	if 'choices' in data and len(data['choices']) > 0:
	delta = data['choices'][0].get('delta', {})
	if 'content' in delta:
	content = delta['content']
	accumulated_text += content
	yield accumulated_text
	time.sleep(0.02) # Small delay for smooth streaming
	except json.JSONDecodeError:
	continue
	except Exception as e:
	# Fallback to non-streaming if streaming fails
	yield call_llm_fallback(question, context, is_followup)

	def call_llm_fallback(question: str, context: str, is_followup: bool = False) -> str:
	"""Fallback non-streaming LLM call."""
	if is_followup:
	prompt = (
	"You are a Non-QM glossary assistant.\n"
	"The user is asking for more details about a previous topic.\n"
	"Answer with additional information from the context.\n"
	"Keep it to 3 sentences max. Finish with this exact line:\n"
	f"{DISCLAIMER}\n\n"
	f"User: {question}\n"
	f"Context:\n{context}"
	)
	max_tokens = 150
	else:
	prompt = (
	"You are a Non-QM glossary assistant.\n"
	"Answer the user only with information in the context.\n"
	"Two sentences max. Finish with this exact line:\n"
	f"{DISCLAIMER}\n\n"
	f"User: {question}\n"
	f"Context:\n{context}"
	)
	max_tokens = 120

	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"X-Title": "nonqm-glossary-bot"
	}

	resp = requests.post(
	"https://openrouter.ai/api/v1/chat/completions",
	headers=headers,
	json={
	"model": GPT_MODEL,
	"messages": [{"role": "user", "content": prompt}],
	"max_tokens": max_tokens,
	"temperature": 0.3
	},
	timeout=60 # Increased timeout for OpenRouter stability
	)
	resp.raise_for_status()
	return resp.json()["choices"][0]["message"]["content"].strip()

	# ----- Enhanced Gradio callback with conversation memory -----
	def chat_fn(message, history):
	# PII detection (compliance requirement)
	if contains_pii(message):
	yield "I cannot process messages containing personal information. Please ask about glossary terms only."
	return

	# Detect if this is a follow-up question
	is_followup = detect_followup_question(message)
	conversation_context = None

	if is_followup and history:
	# Get conversation context for better retrieval
	last_topic = extract_last_topic(history)
	if last_topic:
	conversation_context = last_topic
	# Try enhanced search with conversation context
	hits = retrieve(message, conversation_context)
	else:
	hits = retrieve(message)
	else:
	# Regular retrieval for new questions
	hits = retrieve(message)

	# Handle no results
	if not hits:
	if is_followup:
	yield "I don't have additional information on that topic in our glossary. Please ask a specific question about a Non-QM term, or contact a loan officer for more detailed assistance."
	else:
	yield "I'm not sure about that term. Please contact a loan officer for assistance with questions outside our glossary."
	return

	# Stream the response
	context = "\n---\n".join(hits)
	for partial_response in call_llm_streaming(message, context, is_followup):
	yield partial_response

	# ----- Custom CSS for enhanced aesthetics -----
	custom_theme = gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="gray",
	neutral_hue="slate",
	).set(
	body_background_fill="linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
	block_background_fill="*neutral_50",
	button_primary_background_fill="linear-gradient(90deg, #667eea 0%, #764ba2 100%)",
	button_primary_background_fill_hover="linear-gradient(90deg, #5a6fd8 0%, #6a4190 100%)",
	)

	custom_css = """
	.gradio-container {
	max-width: 900px !important;
	margin: auto !important;
	border-radius: 15px !important;
	box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important;
	}

	.chat-message {
	border-radius: 12px !important;
	margin: 8px 0 !important;
	padding: 12px !important;
	}

	.message-wrap {
	max-width: 85% !important;
	}

	.user .message-wrap {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	color: white !important;
	}

	.bot .message-wrap {
	background: #f8f9fa !important;
	border: 1px solid #e9ecef !important;
	}

	.disclaimer {
	font-style: italic !important;
	color: #6c757d !important;
	border-top: 1px solid #dee2e6 !important;
	margin-top: 8px !important;
	padding-top: 8px !important;
	}

	/* Typing animation for streaming */
	@keyframes typing {
	0% { opacity: 0.4; }
	50% { opacity: 1; }
	100% { opacity: 0.4; }
	}

	.streaming-text {
	animation: typing 1.5s infinite;
	}
	"""

	# ----- Enhanced UI -----
	with gr.Blocks(theme=custom_theme, css=custom_css, title="Non-QM Glossary Assistant") as demo:
	gr.HTML("""
	<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 12px; margin-bottom: 20px;">
	<h1 style="margin: 0; font-size: 2.5em; font-weight: 700;">🏠 Non-QM Glossary Assistant</h1>
	<p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.95;">
	Get instant, accurate definitions of Non-Qualified Mortgage terms
	</p>
	</div>
	""")

	gr.Markdown("""
	### 💬 How to Use This Assistant

	- Ask about Non-QM mortgage terms and receive clear, accurate definitions
	- Ask follow-up questions like "tell me more" or "can you elaborate" for additional details
	- Questions outside our glossary scope will be directed to a loan officer
	- All responses include required compliance disclaimers
	- No personal information should be shared in your questions

	Example questions:
	- "What is a Non-QM loan?"
	- "Define debt-to-income ratio"
	- "What does DSCR mean?"
	- "Explain asset-based lending"
	- "Tell me more about that" (after asking about a term)
	""")

	chatbot = gr.ChatInterface(
	fn=chat_fn,
	title="Non-QM Glossary Assistant",
	description="Ask about Non-QM mortgage terms and get instant definitions. Follow-up questions welcome!",
	type="messages"
	)

	gr.HTML("""
	<div style="text-align: center; margin-top: 20px; padding: 20px; background: #dc3545; border: 2px solid #b02a37; border-radius: 12px; box-shadow: 0 4px 12px rgba(220, 53, 69, 0.3);">
	<p style="margin: 0; color: white; font-size: 1.1em; font-weight: 600; line-height: 1.4;">
	<strong>⚠️ IMPORTANT COMPLIANCE NOTICE:</strong><br><br>
	This assistant provides general information only and is NOT a commitment to lend.<br>
	For personalized advice, loan applications, or specific financial guidance,<br>
	please contact a qualified loan officer.
	</p>
	</div>
	""")

	if __name__ == "__main__":
	demo.launch()