Spaces:
Sleeping
Sleeping
Upload 7 files
Browse filesAdded Basic functionalities
- .gitattributes +1 -0
- .gitignore +251 -0
- README.md +241 -12
- app.py +384 -0
- build_index.py +39 -0
- chunks.json +1 -0
- glossary.index +3 -0
- requirements.txt +7 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
glossary.index filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces .gitignore
|
| 2 |
+
# Optimized for Non-QM Glossary Chatbot deployment
|
| 3 |
+
|
| 4 |
+
# ==========================================
|
| 5 |
+
# Environment and Configuration Files
|
| 6 |
+
# ==========================================
|
| 7 |
+
.env
|
| 8 |
+
.env.local
|
| 9 |
+
.env.production
|
| 10 |
+
.env.staging
|
| 11 |
+
config.json
|
| 12 |
+
secrets.json
|
| 13 |
+
|
| 14 |
+
# ==========================================
|
| 15 |
+
# Cline AI Assistant Files
|
| 16 |
+
# ==========================================
|
| 17 |
+
.clinerules
|
| 18 |
+
.cline/
|
| 19 |
+
memory-bank/
|
| 20 |
+
.claude/
|
| 21 |
+
.cursor/
|
| 22 |
+
|
| 23 |
+
# ==========================================
|
| 24 |
+
# Development Documentation
|
| 25 |
+
# ==========================================
|
| 26 |
+
designDoc.md
|
| 27 |
+
Basic_Design_Doc.docx
|
| 28 |
+
Glossary.pdf
|
| 29 |
+
README_dev.md
|
| 30 |
+
DEVELOPMENT.md
|
| 31 |
+
TODO.md
|
| 32 |
+
NOTES.md
|
| 33 |
+
|
| 34 |
+
# ==========================================
|
| 35 |
+
# Python
|
| 36 |
+
# ==========================================
|
| 37 |
+
__pycache__/
|
| 38 |
+
*.py[cod]
|
| 39 |
+
*$py.class
|
| 40 |
+
*.so
|
| 41 |
+
.Python
|
| 42 |
+
build/
|
| 43 |
+
develop-eggs/
|
| 44 |
+
dist/
|
| 45 |
+
downloads/
|
| 46 |
+
eggs/
|
| 47 |
+
.eggs/
|
| 48 |
+
lib/
|
| 49 |
+
lib64/
|
| 50 |
+
parts/
|
| 51 |
+
sdist/
|
| 52 |
+
var/
|
| 53 |
+
wheels/
|
| 54 |
+
share/python-wheels/
|
| 55 |
+
*.egg-info/
|
| 56 |
+
.installed.cfg
|
| 57 |
+
*.egg
|
| 58 |
+
MANIFEST
|
| 59 |
+
|
| 60 |
+
# ==========================================
|
| 61 |
+
# Virtual Environments
|
| 62 |
+
# ==========================================
|
| 63 |
+
venv/
|
| 64 |
+
env/
|
| 65 |
+
ENV/
|
| 66 |
+
env.bak/
|
| 67 |
+
venv.bak/
|
| 68 |
+
.venv/
|
| 69 |
+
.env/
|
| 70 |
+
.conda/
|
| 71 |
+
conda-meta/
|
| 72 |
+
|
| 73 |
+
# ==========================================
|
| 74 |
+
# IDEs and Editors
|
| 75 |
+
# ==========================================
|
| 76 |
+
.vscode/
|
| 77 |
+
.idea/
|
| 78 |
+
*.swp
|
| 79 |
+
*.swo
|
| 80 |
+
*~
|
| 81 |
+
.DS_Store
|
| 82 |
+
Thumbs.db
|
| 83 |
+
|
| 84 |
+
# VS Code
|
| 85 |
+
.vscode/settings.json
|
| 86 |
+
.vscode/tasks.json
|
| 87 |
+
.vscode/launch.json
|
| 88 |
+
.vscode/extensions.json
|
| 89 |
+
.vscode/cline_docs.md
|
| 90 |
+
|
| 91 |
+
# PyCharm
|
| 92 |
+
.idea/
|
| 93 |
+
*.iml
|
| 94 |
+
*.iws
|
| 95 |
+
|
| 96 |
+
# Sublime Text
|
| 97 |
+
*.sublime-project
|
| 98 |
+
*.sublime-workspace
|
| 99 |
+
|
| 100 |
+
# Vim
|
| 101 |
+
.vim/
|
| 102 |
+
*.swp
|
| 103 |
+
*.swo
|
| 104 |
+
|
| 105 |
+
# ==========================================
|
| 106 |
+
# OS Generated Files
|
| 107 |
+
# ==========================================
|
| 108 |
+
.DS_Store
|
| 109 |
+
.DS_Store?
|
| 110 |
+
._*
|
| 111 |
+
.Spotlight-V100
|
| 112 |
+
.Trashes
|
| 113 |
+
ehthumbs.db
|
| 114 |
+
Thumbs.db
|
| 115 |
+
desktop.ini
|
| 116 |
+
|
| 117 |
+
# ==========================================
|
| 118 |
+
# Logs and Databases
|
| 119 |
+
# ==========================================
|
| 120 |
+
*.log
|
| 121 |
+
logs/
|
| 122 |
+
log/
|
| 123 |
+
*.sqlite
|
| 124 |
+
*.sqlite3
|
| 125 |
+
*.db
|
| 126 |
+
|
| 127 |
+
# ==========================================
|
| 128 |
+
# Testing and Coverage
|
| 129 |
+
# ==========================================
|
| 130 |
+
.coverage
|
| 131 |
+
.pytest_cache/
|
| 132 |
+
.tox/
|
| 133 |
+
.nox/
|
| 134 |
+
coverage.xml
|
| 135 |
+
*.cover
|
| 136 |
+
*.py,cover
|
| 137 |
+
.hypothesis/
|
| 138 |
+
.cache
|
| 139 |
+
nosetests.xml
|
| 140 |
+
|
| 141 |
+
# ==========================================
|
| 142 |
+
# Jupyter Notebook
|
| 143 |
+
# ==========================================
|
| 144 |
+
.ipynb_checkpoints
|
| 145 |
+
*.ipynb
|
| 146 |
+
|
| 147 |
+
# ==========================================
|
| 148 |
+
# Model and Data Files (Exclude Large Files)
|
| 149 |
+
# ==========================================
|
| 150 |
+
# Note: We DO want to include these for our chatbot:
|
| 151 |
+
# - glossary.txt (source data)
|
| 152 |
+
# - glossary.index (FAISS index)
|
| 153 |
+
# - chunks.json (preprocessed data)
|
| 154 |
+
|
| 155 |
+
# But exclude any backup or temporary versions
|
| 156 |
+
*.bak
|
| 157 |
+
*.backup
|
| 158 |
+
*.tmp
|
| 159 |
+
*_backup.*
|
| 160 |
+
*_temp.*
|
| 161 |
+
|
| 162 |
+
# ==========================================
|
| 163 |
+
# Package Managers
|
| 164 |
+
# ==========================================
|
| 165 |
+
node_modules/
|
| 166 |
+
npm-debug.log*
|
| 167 |
+
yarn-debug.log*
|
| 168 |
+
yarn-error.log*
|
| 169 |
+
package-lock.json
|
| 170 |
+
yarn.lock
|
| 171 |
+
|
| 172 |
+
# ==========================================
|
| 173 |
+
# Security and Sensitive Files
|
| 174 |
+
# ==========================================
|
| 175 |
+
*.pem
|
| 176 |
+
*.key
|
| 177 |
+
*.cert
|
| 178 |
+
*.crt
|
| 179 |
+
id_rsa
|
| 180 |
+
id_dsa
|
| 181 |
+
.ssh/
|
| 182 |
+
.gnupg/
|
| 183 |
+
|
| 184 |
+
# ==========================================
|
| 185 |
+
# Temporary Files
|
| 186 |
+
# ==========================================
|
| 187 |
+
*.tmp
|
| 188 |
+
*.temp
|
| 189 |
+
temp/
|
| 190 |
+
tmp/
|
| 191 |
+
.cache/
|
| 192 |
+
.temp/
|
| 193 |
+
|
| 194 |
+
# ==========================================
|
| 195 |
+
# Backup Files
|
| 196 |
+
# ==========================================
|
| 197 |
+
*.orig
|
| 198 |
+
*.bak
|
| 199 |
+
*.backup
|
| 200 |
+
*~
|
| 201 |
+
#*#
|
| 202 |
+
|
| 203 |
+
# ==========================================
|
| 204 |
+
# Hugging Face Specific
|
| 205 |
+
# ==========================================
|
| 206 |
+
.gradio/
|
| 207 |
+
gradio_cached_examples/
|
| 208 |
+
.hf_token
|
| 209 |
+
hf_token.txt
|
| 210 |
+
|
| 211 |
+
# ==========================================
|
| 212 |
+
# Git
|
| 213 |
+
# ==========================================
|
| 214 |
+
.git/
|
| 215 |
+
.gitignore_local
|
| 216 |
+
.gitconfig_local
|
| 217 |
+
|
| 218 |
+
# ==========================================
|
| 219 |
+
# Local Development Scripts
|
| 220 |
+
# ==========================================
|
| 221 |
+
run_local.py
|
| 222 |
+
test_local.py
|
| 223 |
+
debug.py
|
| 224 |
+
local_test.sh
|
| 225 |
+
dev_setup.sh
|
| 226 |
+
|
| 227 |
+
# ==========================================
|
| 228 |
+
# Documentation Build Files
|
| 229 |
+
# ==========================================
|
| 230 |
+
docs/_build/
|
| 231 |
+
docs/build/
|
| 232 |
+
site/
|
| 233 |
+
.readthedocs.yml
|
| 234 |
+
|
| 235 |
+
# ==========================================
|
| 236 |
+
# Performance and Profiling
|
| 237 |
+
# ==========================================
|
| 238 |
+
.prof
|
| 239 |
+
*.prof
|
| 240 |
+
.benchmark
|
| 241 |
+
profile_output
|
| 242 |
+
|
| 243 |
+
# END: Files above this line will be excluded from Hugging Face Spaces
|
| 244 |
+
#
|
| 245 |
+
# INCLUDED FILES (should be committed):
|
| 246 |
+
# - app.py (main application)
|
| 247 |
+
# - requirements.txt (dependencies)
|
| 248 |
+
# - glossary.txt (source data)
|
| 249 |
+
# - glossary.index (FAISS vector index)
|
| 250 |
+
# - chunks.json (preprocessed data)
|
| 251 |
+
# - build_index.py (for maintenance)
|
README.md
CHANGED
|
@@ -1,12 +1,241 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Non-QM Glossary Chatbot
|
| 2 |
+
|
| 3 |
+
A professional RAG-powered chatbot that provides instant, accurate definitions of Non-Qualified Mortgage terms with strict compliance controls and conversation memory.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- 🏠 **Non-QM Expertise**: Specialized glossary of mortgage terminology
|
| 8 |
+
- 💬 **Conversation Memory**: Smart follow-up question handling
|
| 9 |
+
- 🔒 **Compliance First**: Built-in disclaimers and PII protection
|
| 10 |
+
- ⚡ **Streaming Responses**: Real-time text generation
|
| 11 |
+
- 🎨 **Professional UI**: Modern Gradio interface with custom styling
|
| 12 |
+
- 💰 **Cost Efficient**: Optimized for <$10/month operation
|
| 13 |
+
|
| 14 |
+
## Prerequisites
|
| 15 |
+
|
| 16 |
+
- Python 3.8 or higher
|
| 17 |
+
- OpenAI API key (for embeddings)
|
| 18 |
+
- OpenRouter API key (for Gemini LLM access)
|
| 19 |
+
|
| 20 |
+
## Installation
|
| 21 |
+
|
| 22 |
+
1. **Clone the repository:**
|
| 23 |
+
```bash
|
| 24 |
+
git clone <repository-url>
|
| 25 |
+
cd ChatBot
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
2. **Create and activate a virtual environment:**
|
| 29 |
+
```bash
|
| 30 |
+
python -m venv venv
|
| 31 |
+
|
| 32 |
+
# On Windows:
|
| 33 |
+
venv\Scripts\activate
|
| 34 |
+
|
| 35 |
+
# On macOS/Linux:
|
| 36 |
+
source venv/bin/activate
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
3. **Install dependencies:**
|
| 40 |
+
```bash
|
| 41 |
+
pip install -r requirements.txt
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
## API Key Setup
|
| 45 |
+
|
| 46 |
+
### 1. OpenAI API Key
|
| 47 |
+
1. Go to [OpenAI API Keys](https://platform.openai.com/api-keys)
|
| 48 |
+
2. Create a new API key
|
| 49 |
+
3. Copy the key (starts with `sk-proj-...`)
|
| 50 |
+
|
| 51 |
+
### 2. OpenRouter API Key
|
| 52 |
+
1. Go to [OpenRouter Keys](https://openrouter.ai/keys)
|
| 53 |
+
2. Create a new API key
|
| 54 |
+
3. Copy the key (starts with `sk-or-...`)
|
| 55 |
+
|
| 56 |
+
### 3. Environment Configuration
|
| 57 |
+
|
| 58 |
+
Create a `.env` file in the project root:
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
# Create .env file
|
| 62 |
+
touch .env
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
Add your API keys to the `.env` file:
|
| 66 |
+
|
| 67 |
+
```env
|
| 68 |
+
OPENAI_API_KEY=sk-proj-your-openai-key-here
|
| 69 |
+
OPENROUTER_API_KEY=sk-or-your-openrouter-key-here
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
⚠️ **Important:** Never commit your `.env` file to version control. It's already included in `.gitignore`.
|
| 73 |
+
|
| 74 |
+
## Running the Application
|
| 75 |
+
|
| 76 |
+
### 1. Generate Vector Index (First Time Only)
|
| 77 |
+
|
| 78 |
+
Before running the chatbot for the first time, generate the search index:
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
python build_index.py
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
This creates:
|
| 85 |
+
- `glossary.index` - FAISS vector search index
|
| 86 |
+
- `chunks.json` - Text chunks metadata
|
| 87 |
+
|
| 88 |
+
### 2. Start the Chatbot
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
python app.py
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
The application will start and display:
|
| 95 |
+
```
|
| 96 |
+
Running on local URL: http://127.0.0.1:7860
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
### 3. Access the Interface
|
| 100 |
+
|
| 101 |
+
Open your browser and go to: `http://127.0.0.1:7860`
|
| 102 |
+
|
| 103 |
+
## Usage
|
| 104 |
+
|
| 105 |
+
### Basic Questions
|
| 106 |
+
Ask about Non-QM mortgage terms:
|
| 107 |
+
- "What is a Non-QM loan?"
|
| 108 |
+
- "Define debt-to-income ratio"
|
| 109 |
+
- "What does DSCR mean?"
|
| 110 |
+
- "Explain asset-based lending"
|
| 111 |
+
|
| 112 |
+
### Follow-up Questions
|
| 113 |
+
The chatbot remembers conversation context:
|
| 114 |
+
- After asking about a term, say "tell me more"
|
| 115 |
+
- "Can you elaborate on that?"
|
| 116 |
+
- "Give me more details"
|
| 117 |
+
|
| 118 |
+
### What NOT to Ask
|
| 119 |
+
- Personal financial information
|
| 120 |
+
- Rate quotes or loan applications
|
| 121 |
+
- Questions outside the glossary scope
|
| 122 |
+
|
| 123 |
+
## Project Structure
|
| 124 |
+
|
| 125 |
+
```
|
| 126 |
+
ChatBot/
|
| 127 |
+
├── app.py # Main Gradio application
|
| 128 |
+
├── build_index.py # Vector index generation
|
| 129 |
+
├── requirements.txt # Python dependencies
|
| 130 |
+
├── glossary.txt # Source glossary content
|
| 131 |
+
├── glossary.index # Generated FAISS index (after build)
|
| 132 |
+
├── chunks.json # Generated text chunks (after build)
|
| 133 |
+
├── .env # API keys (create this file)
|
| 134 |
+
├── .gitignore # Files to exclude from git
|
| 135 |
+
└── memory-bank/ # Project documentation
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
## Configuration
|
| 139 |
+
|
| 140 |
+
Key settings in `app.py`:
|
| 141 |
+
|
| 142 |
+
```python
|
| 143 |
+
EMBED_MODEL = "text-embedding-3-small" # OpenAI embeddings
|
| 144 |
+
GPT_MODEL = "google/gemini-2.5-flash-preview-05-20" # OpenRouter LLM
|
| 145 |
+
SIM_THRESHOLD = 0.30 # Similarity threshold
|
| 146 |
+
TOP_K = 3 # Number of chunks to retrieve
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
## Deployment
|
| 150 |
+
|
| 151 |
+
### Hugging Face Spaces
|
| 152 |
+
|
| 153 |
+
1. **Create a new Space:**
|
| 154 |
+
- Go to [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 155 |
+
- Choose Gradio SDK
|
| 156 |
+
- Set hardware to CPU Basic (free)
|
| 157 |
+
|
| 158 |
+
2. **Upload required files:**
|
| 159 |
+
```
|
| 160 |
+
app.py
|
| 161 |
+
requirements.txt
|
| 162 |
+
glossary.txt
|
| 163 |
+
glossary.index
|
| 164 |
+
chunks.json
|
| 165 |
+
build_index.py
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
3. **Configure secrets in HF Spaces:**
|
| 169 |
+
- Go to Settings → Variables and Secrets
|
| 170 |
+
- Add `OPENAI_API_KEY`
|
| 171 |
+
- Add `OPENROUTER_API_KEY`
|
| 172 |
+
|
| 173 |
+
4. **Deploy:**
|
| 174 |
+
- Push files to the Space repository
|
| 175 |
+
- The app will automatically build and deploy
|
| 176 |
+
|
| 177 |
+
## Maintenance
|
| 178 |
+
|
| 179 |
+
### Updating the Glossary
|
| 180 |
+
|
| 181 |
+
1. Edit `glossary.txt` with new terms
|
| 182 |
+
2. Regenerate the index:
|
| 183 |
+
```bash
|
| 184 |
+
python build_index.py
|
| 185 |
+
```
|
| 186 |
+
3. Restart the application
|
| 187 |
+
|
| 188 |
+
### Cost Monitoring
|
| 189 |
+
|
| 190 |
+
- **OpenAI**: ~$0.0001 per query (embeddings)
|
| 191 |
+
- **OpenRouter**: ~$0.005 per response (Gemini)
|
| 192 |
+
- **Target**: <$10/month total operation
|
| 193 |
+
|
| 194 |
+
### Troubleshooting
|
| 195 |
+
|
| 196 |
+
**Common Issues:**
|
| 197 |
+
|
| 198 |
+
1. **"Module not found" error:**
|
| 199 |
+
```bash
|
| 200 |
+
pip install -r requirements.txt
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
2. **"No such file" for index files:**
|
| 204 |
+
```bash
|
| 205 |
+
python build_index.py
|
| 206 |
+
```
|
| 207 |
+
|
| 208 |
+
3. **API key errors:**
|
| 209 |
+
- Check `.env` file exists and has correct keys
|
| 210 |
+
- Verify API keys are valid and have sufficient credits
|
| 211 |
+
|
| 212 |
+
4. **Import errors:**
|
| 213 |
+
```bash
|
| 214 |
+
pip install faiss-cpu numpy openai requests gradio python-dotenv
|
| 215 |
+
```
|
| 216 |
+
|
| 217 |
+
## Compliance Features
|
| 218 |
+
|
| 219 |
+
- **Automatic Disclaimers**: Every response includes required compliance text
|
| 220 |
+
- **PII Detection**: Blocks emails, SSNs, and credit score references
|
| 221 |
+
- **Scope Limiting**: Only answers questions about glossary terms
|
| 222 |
+
- **Session Memory**: Context resets when chat is cleared (no persistent data)
|
| 223 |
+
|
| 224 |
+
## Security
|
| 225 |
+
|
| 226 |
+
- API keys stored in environment variables
|
| 227 |
+
- No user data persistence
|
| 228 |
+
- Input sanitization and validation
|
| 229 |
+
- PII detection and rejection
|
| 230 |
+
|
| 231 |
+
## Support
|
| 232 |
+
|
| 233 |
+
For technical issues:
|
| 234 |
+
1. Check the troubleshooting section above
|
| 235 |
+
2. Verify all dependencies are installed
|
| 236 |
+
3. Ensure API keys are correctly configured
|
| 237 |
+
4. Check that vector index files exist
|
| 238 |
+
|
| 239 |
+
## License
|
| 240 |
+
|
| 241 |
+
This project is designed for internal compliance-focused use with strict business requirements.
|
app.py
ADDED
|
@@ -0,0 +1,384 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import faiss
|
| 4 |
+
import numpy as np
|
| 5 |
+
import requests
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
import openai
|
| 9 |
+
import re
|
| 10 |
+
import time
|
| 11 |
+
|
| 12 |
+
# ---------- config ----------
|
| 13 |
+
EMBED_MODEL = "text-embedding-3-small" # OpenAI
|
| 14 |
+
GPT_MODEL = "google/gemini-2.5-flash-preview-05-20" # OpenRouter
|
| 15 |
+
SIM_THRESHOLD = 0.30 # tweak if recall is poor
|
| 16 |
+
TOP_K = 3
|
| 17 |
+
DISCLAIMER = "General info only, not a commitment to lend."
|
| 18 |
+
# ----------------------------
|
| 19 |
+
|
| 20 |
+
load_dotenv()
|
| 21 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 22 |
+
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
| 23 |
+
|
| 24 |
+
# ----- load glossary vectors -----
|
| 25 |
+
with open("chunks.json", encoding="utf8") as f:
|
| 26 |
+
CHUNKS = json.load(f)
|
| 27 |
+
|
| 28 |
+
INDEX = faiss.read_index("glossary.index")
|
| 29 |
+
|
| 30 |
+
# ----- PII detection (compliance requirement) -----
|
| 31 |
+
def contains_pii(text: str) -> bool:
|
| 32 |
+
"""Basic PII detection for emails, SSNs, credit scores."""
|
| 33 |
+
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
| 34 |
+
ssn_pattern = r'\b\d{3}-?\d{2}-?\d{4}\b'
|
| 35 |
+
# Tightened credit score pattern to avoid false positives like "Form 4506-C"
|
| 36 |
+
credit_pattern = r'\b(?:[4-8]\d{2})(?:\s*credit\s*score)?\b'
|
| 37 |
+
|
| 38 |
+
return bool(re.search(email_pattern, text) or
|
| 39 |
+
re.search(ssn_pattern, text) or
|
| 40 |
+
re.search(credit_pattern, text))
|
| 41 |
+
|
| 42 |
+
# ----- conversation memory helpers -----
|
| 43 |
+
def detect_followup_question(question: str) -> bool:
|
| 44 |
+
"""Detect if a question is asking for elaboration or follow-up."""
|
| 45 |
+
followup_patterns = [
|
| 46 |
+
r'\b(elaborate|expand|explain more|tell me more|more details|further|additionally)\b',
|
| 47 |
+
r'\b(can you|could you|would you).*(more|further|elaborate|expand)\b',
|
| 48 |
+
r'\b(what about|how about|what else)\b',
|
| 49 |
+
r'\b(that|this|it)\b.*\?', # References to previous topic
|
| 50 |
+
r'^\s*(more|further|additionally|also)\b',
|
| 51 |
+
r'\b(give me more|tell me more|say more)\b'
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
+
question_lower = question.lower()
|
| 55 |
+
return any(re.search(pattern, question_lower) for pattern in followup_patterns)
|
| 56 |
+
|
| 57 |
+
def extract_last_topic(history):
|
| 58 |
+
"""Extract the main topic from the most recent bot response."""
|
| 59 |
+
if not history or len(history) == 0:
|
| 60 |
+
return None
|
| 61 |
+
|
| 62 |
+
# Get the last bot response
|
| 63 |
+
last_exchange = history[-1]
|
| 64 |
+
if isinstance(last_exchange, dict) and 'content' in last_exchange:
|
| 65 |
+
last_response = last_exchange['content']
|
| 66 |
+
elif isinstance(last_exchange, list) and len(last_exchange) >= 2:
|
| 67 |
+
last_response = last_exchange[1] # Bot response
|
| 68 |
+
else:
|
| 69 |
+
return None
|
| 70 |
+
|
| 71 |
+
# Extract key terms from the response (before disclaimer)
|
| 72 |
+
if DISCLAIMER in last_response:
|
| 73 |
+
content = last_response.split(DISCLAIMER)[0].strip()
|
| 74 |
+
else:
|
| 75 |
+
content = last_response
|
| 76 |
+
|
| 77 |
+
# Look for capitalized terms and common Non-QM keywords
|
| 78 |
+
terms = re.findall(r'\b[A-Z][A-Za-z-]+(?:\s+[A-Z][A-Za-z-]+)*\b', content)
|
| 79 |
+
nqm_keywords = ['Non-QM', 'DSCR', 'DTI', 'income', 'ratio', 'loan', 'mortgage', 'lending']
|
| 80 |
+
|
| 81 |
+
# Return the first meaningful term found
|
| 82 |
+
for term in terms:
|
| 83 |
+
if len(term) > 3 and any(keyword.lower() in term.lower() for keyword in nqm_keywords):
|
| 84 |
+
return term
|
| 85 |
+
|
| 86 |
+
return None
|
| 87 |
+
|
| 88 |
+
# ----- helpers -----
|
| 89 |
+
def embed(text: str) -> np.ndarray:
|
| 90 |
+
"""Call OpenAI embedding endpoint and return a normalized float32 numpy vector."""
|
| 91 |
+
res = openai.embeddings.create(
|
| 92 |
+
model=EMBED_MODEL,
|
| 93 |
+
input=[text]
|
| 94 |
+
)
|
| 95 |
+
vec = np.array(res.data[0].embedding, dtype="float32")
|
| 96 |
+
# Normalize the vector for consistent similarity computation
|
| 97 |
+
faiss.normalize_L2(vec.reshape(1, -1))
|
| 98 |
+
return vec
|
| 99 |
+
|
| 100 |
+
def retrieve(question: str, conversation_context: str = None):
|
| 101 |
+
"""Return chunks whose cosine sim >= threshold, with optional conversation context."""
|
| 102 |
+
# Use conversation context for better retrieval if available
|
| 103 |
+
search_query = question
|
| 104 |
+
if conversation_context and detect_followup_question(question):
|
| 105 |
+
search_query = f"{conversation_context} {question}"
|
| 106 |
+
|
| 107 |
+
vec = embed(search_query).reshape(1, -1)
|
| 108 |
+
scores, ids = INDEX.search(vec, TOP_K)
|
| 109 |
+
|
| 110 |
+
relevant_chunks = [
|
| 111 |
+
CHUNKS[i]
|
| 112 |
+
for i, s in zip(ids[0], scores[0])
|
| 113 |
+
if s >= SIM_THRESHOLD
|
| 114 |
+
]
|
| 115 |
+
|
| 116 |
+
# If no results with conversation context, try just the question
|
| 117 |
+
if not relevant_chunks and conversation_context:
|
| 118 |
+
vec = embed(question).reshape(1, -1)
|
| 119 |
+
scores, ids = INDEX.search(vec, TOP_K)
|
| 120 |
+
relevant_chunks = [
|
| 121 |
+
CHUNKS[i]
|
| 122 |
+
for i, s in zip(ids[0], scores[0])
|
| 123 |
+
if s >= SIM_THRESHOLD
|
| 124 |
+
]
|
| 125 |
+
|
| 126 |
+
return relevant_chunks
|
| 127 |
+
|
| 128 |
+
def call_llm_streaming(question: str, context: str, is_followup: bool = False):
|
| 129 |
+
"""Stream LLM response while ensuring compliance."""
|
| 130 |
+
# Adjust prompt for follow-up questions
|
| 131 |
+
if is_followup:
|
| 132 |
+
prompt = (
|
| 133 |
+
"You are a Non-QM glossary assistant.\n"
|
| 134 |
+
"The user is asking for more details about a previous topic.\n"
|
| 135 |
+
"Answer with additional information from the context.\n"
|
| 136 |
+
"Keep it to 3 sentences max. Finish with this exact line:\n"
|
| 137 |
+
f"{DISCLAIMER}\n\n"
|
| 138 |
+
f"User: {question}\n"
|
| 139 |
+
f"Context:\n{context}"
|
| 140 |
+
)
|
| 141 |
+
max_tokens = 150 # Allow slightly more for elaboration
|
| 142 |
+
else:
|
| 143 |
+
prompt = (
|
| 144 |
+
"You are a Non-QM glossary assistant.\n"
|
| 145 |
+
"Answer the user only with information in the context.\n"
|
| 146 |
+
"Two sentences max. Finish with this exact line:\n"
|
| 147 |
+
f"{DISCLAIMER}\n\n"
|
| 148 |
+
f"User: {question}\n"
|
| 149 |
+
f"Context:\n{context}"
|
| 150 |
+
)
|
| 151 |
+
max_tokens = 120
|
| 152 |
+
|
| 153 |
+
headers = {
|
| 154 |
+
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 155 |
+
"X-Title": "nonqm-glossary-bot"
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
try:
|
| 159 |
+
resp = requests.post(
|
| 160 |
+
"https://openrouter.ai/api/v1/chat/completions",
|
| 161 |
+
headers=headers,
|
| 162 |
+
json={
|
| 163 |
+
"model": GPT_MODEL,
|
| 164 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 165 |
+
"max_tokens": max_tokens,
|
| 166 |
+
"temperature": 0.3,
|
| 167 |
+
"stream": True
|
| 168 |
+
},
|
| 169 |
+
timeout=60, # Increased timeout for OpenRouter stability
|
| 170 |
+
stream=True
|
| 171 |
+
)
|
| 172 |
+
resp.raise_for_status()
|
| 173 |
+
|
| 174 |
+
accumulated_text = ""
|
| 175 |
+
for line in resp.iter_lines():
|
| 176 |
+
if line:
|
| 177 |
+
line = line.decode('utf-8')
|
| 178 |
+
if line.startswith('data: '):
|
| 179 |
+
line = line[6:]
|
| 180 |
+
if line.strip() == '[DONE]':
|
| 181 |
+
break
|
| 182 |
+
try:
|
| 183 |
+
data = json.loads(line)
|
| 184 |
+
if 'choices' in data and len(data['choices']) > 0:
|
| 185 |
+
delta = data['choices'][0].get('delta', {})
|
| 186 |
+
if 'content' in delta:
|
| 187 |
+
content = delta['content']
|
| 188 |
+
accumulated_text += content
|
| 189 |
+
yield accumulated_text
|
| 190 |
+
time.sleep(0.02) # Small delay for smooth streaming
|
| 191 |
+
except json.JSONDecodeError:
|
| 192 |
+
continue
|
| 193 |
+
except Exception as e:
|
| 194 |
+
# Fallback to non-streaming if streaming fails
|
| 195 |
+
yield call_llm_fallback(question, context, is_followup)
|
| 196 |
+
|
| 197 |
+
def call_llm_fallback(question: str, context: str, is_followup: bool = False) -> str:
|
| 198 |
+
"""Fallback non-streaming LLM call."""
|
| 199 |
+
if is_followup:
|
| 200 |
+
prompt = (
|
| 201 |
+
"You are a Non-QM glossary assistant.\n"
|
| 202 |
+
"The user is asking for more details about a previous topic.\n"
|
| 203 |
+
"Answer with additional information from the context.\n"
|
| 204 |
+
"Keep it to 3 sentences max. Finish with this exact line:\n"
|
| 205 |
+
f"{DISCLAIMER}\n\n"
|
| 206 |
+
f"User: {question}\n"
|
| 207 |
+
f"Context:\n{context}"
|
| 208 |
+
)
|
| 209 |
+
max_tokens = 150
|
| 210 |
+
else:
|
| 211 |
+
prompt = (
|
| 212 |
+
"You are a Non-QM glossary assistant.\n"
|
| 213 |
+
"Answer the user only with information in the context.\n"
|
| 214 |
+
"Two sentences max. Finish with this exact line:\n"
|
| 215 |
+
f"{DISCLAIMER}\n\n"
|
| 216 |
+
f"User: {question}\n"
|
| 217 |
+
f"Context:\n{context}"
|
| 218 |
+
)
|
| 219 |
+
max_tokens = 120
|
| 220 |
+
|
| 221 |
+
headers = {
|
| 222 |
+
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 223 |
+
"X-Title": "nonqm-glossary-bot"
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
resp = requests.post(
|
| 227 |
+
"https://openrouter.ai/api/v1/chat/completions",
|
| 228 |
+
headers=headers,
|
| 229 |
+
json={
|
| 230 |
+
"model": GPT_MODEL,
|
| 231 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 232 |
+
"max_tokens": max_tokens,
|
| 233 |
+
"temperature": 0.3
|
| 234 |
+
},
|
| 235 |
+
timeout=60 # Increased timeout for OpenRouter stability
|
| 236 |
+
)
|
| 237 |
+
resp.raise_for_status()
|
| 238 |
+
return resp.json()["choices"][0]["message"]["content"].strip()
|
| 239 |
+
|
| 240 |
+
# ----- Enhanced Gradio callback with conversation memory -----
|
| 241 |
+
def chat_fn(message, history):
|
| 242 |
+
# PII detection (compliance requirement)
|
| 243 |
+
if contains_pii(message):
|
| 244 |
+
yield "I cannot process messages containing personal information. Please ask about glossary terms only."
|
| 245 |
+
return
|
| 246 |
+
|
| 247 |
+
# Detect if this is a follow-up question
|
| 248 |
+
is_followup = detect_followup_question(message)
|
| 249 |
+
conversation_context = None
|
| 250 |
+
|
| 251 |
+
if is_followup and history:
|
| 252 |
+
# Get conversation context for better retrieval
|
| 253 |
+
last_topic = extract_last_topic(history)
|
| 254 |
+
if last_topic:
|
| 255 |
+
conversation_context = last_topic
|
| 256 |
+
# Try enhanced search with conversation context
|
| 257 |
+
hits = retrieve(message, conversation_context)
|
| 258 |
+
else:
|
| 259 |
+
hits = retrieve(message)
|
| 260 |
+
else:
|
| 261 |
+
# Regular retrieval for new questions
|
| 262 |
+
hits = retrieve(message)
|
| 263 |
+
|
| 264 |
+
# Handle no results
|
| 265 |
+
if not hits:
|
| 266 |
+
if is_followup:
|
| 267 |
+
yield "I don't have additional information on that topic in our glossary. Please ask a specific question about a Non-QM term, or contact a loan officer for more detailed assistance."
|
| 268 |
+
else:
|
| 269 |
+
yield "I'm not sure about that term. Please contact a loan officer for assistance with questions outside our glossary."
|
| 270 |
+
return
|
| 271 |
+
|
| 272 |
+
# Stream the response
|
| 273 |
+
context = "\n---\n".join(hits)
|
| 274 |
+
for partial_response in call_llm_streaming(message, context, is_followup):
|
| 275 |
+
yield partial_response
|
| 276 |
+
|
| 277 |
+
# ----- Custom CSS for enhanced aesthetics -----
|
| 278 |
+
custom_theme = gr.themes.Soft(
|
| 279 |
+
primary_hue="blue",
|
| 280 |
+
secondary_hue="gray",
|
| 281 |
+
neutral_hue="slate",
|
| 282 |
+
).set(
|
| 283 |
+
body_background_fill="linear-gradient(135deg, #667eea 0%, #764ba2 100%)",
|
| 284 |
+
block_background_fill="*neutral_50",
|
| 285 |
+
button_primary_background_fill="linear-gradient(90deg, #667eea 0%, #764ba2 100%)",
|
| 286 |
+
button_primary_background_fill_hover="linear-gradient(90deg, #5a6fd8 0%, #6a4190 100%)",
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
custom_css = """
|
| 290 |
+
.gradio-container {
|
| 291 |
+
max-width: 900px !important;
|
| 292 |
+
margin: auto !important;
|
| 293 |
+
border-radius: 15px !important;
|
| 294 |
+
box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important;
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
.chat-message {
|
| 298 |
+
border-radius: 12px !important;
|
| 299 |
+
margin: 8px 0 !important;
|
| 300 |
+
padding: 12px !important;
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
.message-wrap {
|
| 304 |
+
max-width: 85% !important;
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
.user .message-wrap {
|
| 308 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 309 |
+
color: white !important;
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
.bot .message-wrap {
|
| 313 |
+
background: #f8f9fa !important;
|
| 314 |
+
border: 1px solid #e9ecef !important;
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
.disclaimer {
|
| 318 |
+
font-style: italic !important;
|
| 319 |
+
color: #6c757d !important;
|
| 320 |
+
border-top: 1px solid #dee2e6 !important;
|
| 321 |
+
margin-top: 8px !important;
|
| 322 |
+
padding-top: 8px !important;
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
/* Typing animation for streaming */
|
| 326 |
+
@keyframes typing {
|
| 327 |
+
0% { opacity: 0.4; }
|
| 328 |
+
50% { opacity: 1; }
|
| 329 |
+
100% { opacity: 0.4; }
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
.streaming-text {
|
| 333 |
+
animation: typing 1.5s infinite;
|
| 334 |
+
}
|
| 335 |
+
"""
|
| 336 |
+
|
| 337 |
+
# ----- Enhanced UI -----
|
| 338 |
+
with gr.Blocks(theme=custom_theme, css=custom_css, title="Non-QM Glossary Assistant") as demo:
|
| 339 |
+
gr.HTML("""
|
| 340 |
+
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 12px; margin-bottom: 20px;">
|
| 341 |
+
<h1 style="margin: 0; font-size: 2.5em; font-weight: 700;">🏠 Non-QM Glossary Assistant</h1>
|
| 342 |
+
<p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.95;">
|
| 343 |
+
Get instant, accurate definitions of Non-Qualified Mortgage terms
|
| 344 |
+
</p>
|
| 345 |
+
</div>
|
| 346 |
+
""")
|
| 347 |
+
|
| 348 |
+
gr.Markdown("""
|
| 349 |
+
### 💬 How to Use This Assistant
|
| 350 |
+
|
| 351 |
+
- **Ask about Non-QM mortgage terms** and receive clear, accurate definitions
|
| 352 |
+
- **Ask follow-up questions** like "tell me more" or "can you elaborate" for additional details
|
| 353 |
+
- Questions outside our glossary scope will be directed to a loan officer
|
| 354 |
+
- All responses include required compliance disclaimers
|
| 355 |
+
- **No personal information** should be shared in your questions
|
| 356 |
+
|
| 357 |
+
**Example questions:**
|
| 358 |
+
- "What is a Non-QM loan?"
|
| 359 |
+
- "Define debt-to-income ratio"
|
| 360 |
+
- "What does DSCR mean?"
|
| 361 |
+
- "Explain asset-based lending"
|
| 362 |
+
- "Tell me more about that" (after asking about a term)
|
| 363 |
+
""")
|
| 364 |
+
|
| 365 |
+
chatbot = gr.ChatInterface(
|
| 366 |
+
fn=chat_fn,
|
| 367 |
+
title="Non-QM Glossary Assistant",
|
| 368 |
+
description="Ask about Non-QM mortgage terms and get instant definitions. Follow-up questions welcome!",
|
| 369 |
+
type="messages"
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
gr.HTML("""
|
| 373 |
+
<div style="text-align: center; margin-top: 20px; padding: 20px; background: #dc3545; border: 2px solid #b02a37; border-radius: 12px; box-shadow: 0 4px 12px rgba(220, 53, 69, 0.3);">
|
| 374 |
+
<p style="margin: 0; color: white; font-size: 1.1em; font-weight: 600; line-height: 1.4;">
|
| 375 |
+
<strong>⚠️ IMPORTANT COMPLIANCE NOTICE:</strong><br><br>
|
| 376 |
+
This assistant provides general information only and is NOT a commitment to lend.<br>
|
| 377 |
+
For personalized advice, loan applications, or specific financial guidance,<br>
|
| 378 |
+
please contact a qualified loan officer.
|
| 379 |
+
</p>
|
| 380 |
+
</div>
|
| 381 |
+
""")
|
| 382 |
+
|
| 383 |
+
if __name__ == "__main__":
|
| 384 |
+
demo.launch()
|
build_index.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import numpy as np
|
| 4 |
+
import faiss
|
| 5 |
+
import openai
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
# ---------- setup ----------
|
| 9 |
+
load_dotenv() # pulls OPENAI_API_KEY from .env
|
| 10 |
+
client = openai.OpenAI()
|
| 11 |
+
|
| 12 |
+
TXT_FILE = "glossary.txt"
|
| 13 |
+
OUT_INDEX = "glossary.index"
|
| 14 |
+
OUT_CHUNKS = "chunks.json"
|
| 15 |
+
EMBED_MODEL = "text-embedding-3-small"
|
| 16 |
+
# ----------------------------
|
| 17 |
+
|
| 18 |
+
# ---------- load + chunk ----------
|
| 19 |
+
txt = Path(TXT_FILE).read_text(encoding="utf8")
|
| 20 |
+
chunks = [c.strip() for c in txt.split("\n\n") if c.strip()]
|
| 21 |
+
|
| 22 |
+
# ---------- embed ----------
|
| 23 |
+
def embed(texts):
|
| 24 |
+
res = client.embeddings.create(model=EMBED_MODEL, input=texts)
|
| 25 |
+
return [d.embedding for d in res.data]
|
| 26 |
+
|
| 27 |
+
vecs = np.array(embed(chunks), dtype="float32")
|
| 28 |
+
faiss.normalize_L2(vecs) # cosine similarity wants unit vectors
|
| 29 |
+
|
| 30 |
+
# ---------- build index ----------
|
| 31 |
+
dim = vecs.shape[1]
|
| 32 |
+
index = faiss.IndexFlatIP(dim) # inner product == cosine when vectors norm-1
|
| 33 |
+
index.add(vecs)
|
| 34 |
+
|
| 35 |
+
# ---------- save ----------
|
| 36 |
+
faiss.write_index(index, OUT_INDEX)
|
| 37 |
+
Path(OUT_CHUNKS).write_text(json.dumps(chunks, ensure_ascii=False), encoding="utf8")
|
| 38 |
+
|
| 39 |
+
print(f"Built {index.ntotal} vectors → {OUT_INDEX}")
|
chunks.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["# NonQM Glossary \\& Scenarios", "Last edited by Dhruv Ratra (Polaris24) 5 days ago A", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Ability-to-Repay\n(ATR) | Federal rule (TILA § 1026.43) requiring lenders to make a reasonable determination that the\nborrower can repay the loan. Non-QM loans must still satisfy ATR, even though they don't\nmeet the \"Qualified Mortgage\" safe-harbor tests. | ATR Rule |\n| Asset-Depletion\nLoan | Underwriting method that treats a borrower's liquid assets (bank, brokerage, retirement\naccounts) as imputed income to meet ATR. | Asset Utilization,\nAsset-Qualifier |\n| Automatic Value\nModel (AVM) | Computer-generated estimate of property value, often used for DSCR or portfolio reviews\nwhen a full appraisal isn't required. | |", "B", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Bank-Statement Loan | Loan qualified primarily on 12-24 months of personal or business bank statements\ninstead of tax returns. Common for self-employed borrowers. | BS Loan, Alt-Doc Bank\nStatement |\n| Blanket Loan | Single mortgage that covers multiple properties or units. Useful for portfolio\ninvestors. | Portfolio Loan |\n| Borrower Paid\nCompensation (BPC) | When the borrower, not the lender, pays the mortgage broker's commission. | |", "C", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| Cash-Flow Coverage Ratio | See Debt-Service-Coverage Ratio (DSCR). | |\n| CLTV (Combined Loan-to-Value) | Total liens on the property + appraised value. Includes first, second, and HELOC\nbalances. | |\n| Credit Event Seasoning | Elapsed time since negative credit events (BK, foreclosure, short sale).\nMeasured in months. | Seasoning Period |\n| Credit Score\n(FICO®/VantageScore®) | Numeric measure of credit risk. Many Non-QM programs go as low as 600-620. | FICO, Beacon\nScore |", "D", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Debt-Service-Coverage Ratio\n(DSCR) | Net operating income + total property debt service. For rental/investor loans a\nDSCR $\\times 1.0$ indicates the rent covers the payment. | Cash-Flow\nCoverage Ratio |\n| Documentation Level | Spectrum of required borrower docs (Full-Doc, Alt-Doc, Lite-Doc, No-Doc). | |\n| DTI (Debt-to-Income) Ratio | Total monthly debt payments + gross monthly income. Non-QM allows higher\nDTIs (e.g., 55 \\%). | |", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Exit Strategy | How a short-term bridge or fix-and-flip loan will be repaid or refinanced. | |\n| Extension Fee | Charge to prolong a short-term (bridge) loan past its original maturity. | |", "# F", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Foreign National\nLoan | Mortgage to a non-U.S. citizen who resides abroad; qualifies on foreign\ncredit/income or asset-depletion. | ITIN Loan (if borrower has Individual\nTaxpayer ID) |\n| Full-Doc | Standard underwriting with tax returns, W-2s/1099s, pay stubs. Opposite of\nAlt-Doc. | |", "G", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Gift of Equity | Seller (often family) gives part of home equity toward buyer's down payment; allowed under\nsome Non-QM guidelines. | |\n| Guideline\nMatrix | Table showing max LTV/FICO/DTI tiers for a given product. | Rate Sheet, Eligibility\nGrid |", "H", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| HCLTV (High-Credit\nLoan-to-Value) | LTV calculation that factors in the credit limit of a HELOC, not just the current\ndraw. | |\n| Hard Money Loan | Asset-based short-term loan (12-24 mo) often used for rehab or bridge\nfinancing. | Private Money |", "I", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| Interest-Only\n(IO) | Payment structure where borrower pays only interest for a set period (e.g., 10 yrs), after which\namortization begins or balloon payment is due. | |\n| ITIN Borrower | Individual with an IRS Individual Taxpayer Identification Number (not SSN). Often qualifies under\nForeign National or Alt-Doc programs. | |", "J", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| Jumbo (Non-Agency)\nLoan | Loan amount above conforming limits, not sold to Fannie/Freddie. May be QM or\nNon-QM. | |", "K", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Key Rate Adjustment | Rate bump applied when certain credit factors fall outside matrix tiers (e.g., recent BK). | |", "L", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| Lender Paid Compensation\n(LPC) | Broker comp paid by the lender via higher rate/YSP. | |\n| Loan Program | Defined set of eligibility \\& pricing rules (e.g., \"12-Month Bank Statement, 90 \\% LTV,\nNo MI\"). | Product, Shelf |\n| Loan-to-Cost (LTC) | For rehab/ground-up builds: loan amount + total project cost. | |\n| LTV (Loan-to-Value) | First lien amount + appraised value. | |", "M", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Margin (ARM) | Fixed spread added to index rate on adjustable loans. | |\n| Minimum DSCR | Lowest acceptable DSCR for an investor loan (often 0.75-1.00). | |", "N", "| Term | Definition | Synonyms / Aliases |\n| --- | --- | --- |\n| Non-QM (Non-Qualified\nMortgage) | Any mortgage that fails at least one of the CFPB's QM safe-harbor tests (e.g., 43 \\% DTI,\npoints \\& fees, APOR threshold, doc type). Still must meet ATR. | |\n| No-Ratio Loan | Underwriting disregards borrower's DTI; focuses on collateral or assets. | NINA (No Income,\nNo Asset) |", "0", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| Originator Compensation\nRule | CFPB rule limiting how brokers are paid (no steering based on comp, no dual comp).\nApplies equally to Non-QM. | |\n| Occupancy Types | Primary Residence, Second Home, Non-Owner-Occupied (Investor). Eligibility \\& pricing\nvary widely in Non-QM. | |", "P", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| | | |", "| Points \\& Fees Cap | QM loans limited to $3 \\%$ of loan amount; Non-QM has no cap but high points affect pricing \\& demand. | |\n| :--: | :--: | :--: |\n| Prepayment Penalty | Fee for paying off loan early. Common in investor DSCR loans (e.g., 3-2-1 step-down). | PPP |\n| Profit-and-Loss (P\\&L) Statement Loan | Alternate doc type where CPA-prepared P\\&L (with or without statements) substantiates income. | |", "Q", "| Term | Definition | Synonyms / <br> Aliases |\n| :-- | :-- | :-- |\n| QM (Qualified <br> Mortgage) | Loan meeting CFPB safe-harbor criteria (points/fees, APOR, doc type, DTI or price-based <br> test). Opposite of Non-QM. | |", "R", "| Term | Definition | Synonyms / Aliases |\n| :-- | :-- | :-- |\n| Rate Buy-Down | Paying points at closing to secure lower note rate; thresholds differ in Non-QM pricing engines. | |\n| Reserves | Liquid assets required post-closing, expressed in months of PITIA. Non-QM often requires 6-12 mo. | |", "S", "| Term | Definition | Synonyms / <br> Aliases |\n| :-- | :-- | :-- |\n| Scratch-and-Dent <br> Loan | Previously funded loan with documentation/servicing defects that render it unsaleable to <br> agencies-often securitized in Non-QM pools. | $S-\\&-D$ |\n| Seasoning (Title) | Time elapsed since acquisition or cash-out refinance. Affects max LTV for flips. | |\n| Self-Employed <br> Borrower | $\\geq 25 \\%$ ownership in business; usually underwrites via Bank-Statement or Full-Doc 2-yr returns. | |", "T", "| Term | Definition | Synonyms / <br> Aliases |\n| :-- | :-- | :-- |\n| Twelve-Month Bank Statement <br> Program | Counts average monthly deposits over 12 months as qualifying income. | 12-Mo BS |\n| TILA-RESPA Integrated Disclosure <br> (TRID) | CFPB rule dictating Loan Estimate (LE) \\& Closing Disclosure (CD) timing. Applies <br> equally to Non-QM. | |", "U", "| Term | Definition | Synonyms / <br> Aliases |\n| :-- | :-- | :-- |\n| Underwriting Flexibility | Degree to which a lender will grant exceptions to stated guidelines (e.g., manual ATR <br> calculation, compensating factors). <br> (c) Copyright 2020 MyScaler - NetTantra Technologies. All rights reserved. | |", "UWM (Ultimate Weighted Margin)", "Proprietary pricing metric some aggregators use for Non-QM bulk bids.", "V", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| Verification of Employment\n(VOE)-Only Loan | Uses independent VOE to document income instead of pay stubs/tax\nreturns. | VOE Program |\n| VOI / VOA | Verification of Income / Verification of Assets via automated services (e.g.,\nPlaid, Finicity). | |", "# W", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| Written Explanation Letter\n(LOE) | Borrower letter clarifying derogatory credit or cash-flow anomalies; often requested in\nNon-QM. | Letter of\nExplanation |\n| Wholesale Lender | Lender funding loans through third-party mortgage brokers. Dominant distribution\nchannel for Non-QM. | |", "X, Y, Z", "| Term | Definition | Synonyms /\nAliases |\n| --- | --- | --- |\n| Yield-Spread Premium\n(YSP) | Extra rate margin paid to broker when lender covers their comp (see LPC). | |\n| Zero-Prepay | Non-QM loan structure with no prepayment penalty—rare for DSCR but used in\nowner-occupied products. | |", "## Comments", "1 Dhruv Ratra (Polaris24) @dhruv.ooIaris24 $\\cdot 2$ weeks ago Abbreviation Quick Reference ATR $\\cdot$ AVM $\\cdot$ BS $\\cdot$ CLTV $\\cdot$ DSCR $\\cdot$ DTI $\\cdot$ FICO $\\cdot$ HCLTV $\\cdot$ IO $\\cdot$ ITIN $\\cdot$ LTV $\\cdot$ LTC $\\cdot$ LPC $\\cdot$ MSA $\\cdot$ NINA $\\cdot$ QM $\\cdot$ PPP $\\cdot$ TRID $\\cdot$ VOE $\\cdot$ VOI/VOA $\\cdot$ YSP", "2 Dhruv Ratra (Polaris24) @dhruv.ooIaris24 $\\cdot 2$ weeks ago", "## Example Scenarios", "1. Bank-Statement Loan — Income Calculation", "| Detail | Input |\n| --- | --- |\n| Borrower | Self-employed graphic-design studio owner |\n| Program | 12-Month Personal Bank-Statement |\n| Deposits | $\\$ 18,000$ average monthly credits |\n| Expense Factor | $50 \\%$ (per lender matrix) |", "Qualifying Income:", "Traditional tax returns showed only $\\$ 45 \\mathrm{k}$ AGI-insufficient. Under the Non-QM program, the borrower now meets the ATR test for a $\\$ 600 \\mathrm{k}$ purchase at $90 \\%$ LTV. 2. DSCR Rental Loan", "| Detail | Input |\n| --- | --- |\n| Monthly Gross Rent | $\\$ 2,500$ |\n| Property Taxes \\& Insurance | $\\$ 300$ |\n| Mortgage P\\&I (interest-only, year 1) | $\\$ 1,800$ |", "DSCR: $\\$ 2,500 /(\\$ 1,800+\\$ 300)(\\$ 2,500 /(\\$ 1,800+(\\$ 300)=1.19$ Lender's minimum DSCR is 1.00 , so the investor qualifies even without W-2 income. 3. Asset-Qualification / Asset-Depletion / Asset-Utilization", "| Detail | Input |\n| --- | --- |\n| Liquid Assets | $\\$ 1200000$ |\n| Amortization Term Assumed | 60 months |\n| Asset Utilization Factor | $100 \\%$ (no haircut) |", "Imputed Monthly Income: $\\$ 1,200,000+60=\\$ 20,000 / \\mathrm{mo}$ Retired borrower with minimal pension now evidences enough \"income\" to pass ATR for a $\\$ 900 \\mathrm{k}$ cash-out refi. 4. Credit-Event Seasoning", "| Detail | Input |\n| --- | --- |\n| Chapter 7 Bankruptcy Discharge | 18 months ago |\n| Lender Matrix | $\\geq 12 \\mathrm{mo}=\\mathrm{OK}$ to $80 \\% \\mathrm{LTV} ; \\geq 24 \\mathrm{mo}=\\mathrm{OK}$ to $90 \\% \\mathrm{LTV}$ |", "# Outcome:", "At 18 months seasoning, the borrower can obtain up to $80 \\%$ LTV but not $90 \\%$. Waiting six more months would open higher-LTV tiers. 5. Prepayment Penalty - 3-2-1 Step-Down", "| Year Paid Off | Penalty Calculation |\n| --- | --- |\n| Year 1 | $3 \\%$ of outstanding principal |\n| Year 2 | $2 \\%$ of outstanding principal |\n| Year 3 | $1 \\%$ of outstanding principal |\n| Year 4+ | $0 \\%$ |", "If the investor prepays $\\$ 400 \\mathrm{k}$ principal in Year 2, penalty $=\\$ 400 \\mathrm{k} \\times 2 \\%=\\$ 8,000$. 6. Interest-Only (IO) Structure", "| Detail | Input |\n| --- | --- |\n| Loan Amount | $\\$ 700 \\mathrm{k}$ |\n| IO Term | 10 years (then 20-yr amortization) |\n| Note Rate | $7.25 \\%$ |", "Year 1 Payment: Interest-only $=\\$ 700,000 \\times 7.25 \\% / 12=\\$ 4,229$ Year 11 Payment (amortized): Factor for 20-yr @ $7.25 \\% \\approx \\$ 7.90$ per $\\$ 1 \\mathrm{k} \\rightarrow \\$ 7.90 \\times 700=\\$ 5,530$ Borrower's payment jumps about $\\$ 1,300$ when amortization kicks in-disclosed via TRID. 7. Foreign National Purchase", "| Detail | Input |\n| --- | --- |\n| Borrower | Canadian citizen with no U.S. credit |\n| Program | Non-Owner-Occupled DSCR (min DSCR = 1.0) |\n| Down Payment | $25 \\%$ |\n| Documentation | Passport, Canadian bureau report, CPA letter verifying income, 12 mo reserves |", "As long as property cash flow covers debt service (DSCR $\\geq 1.0$ ), the borrower qualifies despite zero U.S. FICO. 8. High-DTI Compensating-Factor Exception", "| Detail | Input |\n| --- | --- |\n| Calculated DTI | $54 \\%$ (lender matrix max $=49 \\%$ ) |\n| Compensating Factors | DSCR 1.25, 12 mo reserves, 780 FICO, $50 \\%$ LTV |", "Underwriting manager grants manual exception based on strong compensating factors-classic Non-QM flexibility.", "These examples demonstrate how Non-QM programs bend traditional rules-yet remain measurable and risk-managed-allowing borrowers and investors otherwise locked out of agency lending to secure financing."]
|
glossary.index
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fe193b3717c0d5f8c23f4bb223542c873c9d573fe532c6bcc02b69a67775855
|
| 3 |
+
size 460845
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
faiss-cpu
|
| 3 |
+
openai
|
| 4 |
+
python-dotenv
|
| 5 |
+
requests
|
| 6 |
+
rapidfuzz # optional spelling helper
|
| 7 |
+
numpy
|