Spaces:
Sleeping
Sleeping
Jackie Makhija commited on
Commit ·
a6a16a2
1
Parent(s): 0cc7766
Fix deployment: correct Dockerfile, add missing dependencies, include all required files
Browse files- .gitignore +63 -0
- DEPLOYMENT_GUIDE.md +755 -0
- Dockerfile +4 -6
- HF_DEPLOYMENT.md +153 -0
- HF_README.md +243 -0
- app.py +34 -19
- conftest.py +95 -0
- requirements.txt +1 -1
.gitignore
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment variables
|
| 2 |
+
.env
|
| 3 |
+
.env.local
|
| 4 |
+
.env.*.local
|
| 5 |
+
|
| 6 |
+
# Python
|
| 7 |
+
__pycache__/
|
| 8 |
+
*.py[cod]
|
| 9 |
+
*$py.class
|
| 10 |
+
*.so
|
| 11 |
+
.Python
|
| 12 |
+
build/
|
| 13 |
+
develop-eggs/
|
| 14 |
+
dist/
|
| 15 |
+
downloads/
|
| 16 |
+
eggs/
|
| 17 |
+
.eggs/
|
| 18 |
+
lib/
|
| 19 |
+
lib64/
|
| 20 |
+
parts/
|
| 21 |
+
sdist/
|
| 22 |
+
var/
|
| 23 |
+
wheels/
|
| 24 |
+
pip-wheel-metadata/
|
| 25 |
+
share/python-wheels/
|
| 26 |
+
*.egg-info/
|
| 27 |
+
.installed.cfg
|
| 28 |
+
*.egg
|
| 29 |
+
PIPFILE.lock
|
| 30 |
+
|
| 31 |
+
# Virtual environments
|
| 32 |
+
venv/
|
| 33 |
+
ENV/
|
| 34 |
+
env/
|
| 35 |
+
.venv
|
| 36 |
+
|
| 37 |
+
# IDE
|
| 38 |
+
.vscode/
|
| 39 |
+
.idea/
|
| 40 |
+
*.swp
|
| 41 |
+
*.swo
|
| 42 |
+
*~
|
| 43 |
+
.DS_Store
|
| 44 |
+
|
| 45 |
+
# Testing
|
| 46 |
+
.pytest_cache/
|
| 47 |
+
.coverage
|
| 48 |
+
htmlcov/
|
| 49 |
+
.tox/
|
| 50 |
+
|
| 51 |
+
# Logs
|
| 52 |
+
logs/
|
| 53 |
+
*.log
|
| 54 |
+
|
| 55 |
+
# Cache
|
| 56 |
+
.cache/
|
| 57 |
+
.mypy_cache/
|
| 58 |
+
.dmypy.json
|
| 59 |
+
dmypy.json
|
| 60 |
+
|
| 61 |
+
# Misc
|
| 62 |
+
*.bak
|
| 63 |
+
*.tmp
|
DEPLOYMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,755 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# UnityCatalog-ChatBot: Step-by-Step Deployment Guide
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
This guide covers deploying the Unity Catalog ChatBot locally, in Docker, and to cloud platforms (AWS ECS, Kubernetes, Azure, etc.).
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Part 1: Local Development Setup
|
| 9 |
+
|
| 10 |
+
### Prerequisites
|
| 11 |
+
- **Python 3.9+** (3.11+ recommended)
|
| 12 |
+
- **pip** (Python package manager)
|
| 13 |
+
- **Git**
|
| 14 |
+
- **Databricks workspace** with:
|
| 15 |
+
- Host URL (e.g., `https://your-workspace.databricks.com`)
|
| 16 |
+
- Personal Access Token (PAT)
|
| 17 |
+
- Warehouse ID (optional, for SQL execution)
|
| 18 |
+
- **Anthropic API key** (Claude access)
|
| 19 |
+
|
| 20 |
+
### Step 1: Clone Repository
|
| 21 |
+
```bash
|
| 22 |
+
git clone <repository-url>
|
| 23 |
+
cd UnityCatalog-ChatBot
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
### Step 2: Create Python Virtual Environment
|
| 27 |
+
```bash
|
| 28 |
+
# Windows
|
| 29 |
+
python -m venv venv
|
| 30 |
+
venv\Scripts\activate
|
| 31 |
+
|
| 32 |
+
# macOS/Linux
|
| 33 |
+
python3 -m venv venv
|
| 34 |
+
source venv/bin/activate
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
### Step 3: Install Dependencies
|
| 38 |
+
```bash
|
| 39 |
+
pip install -r requirements.txt
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
### Step 4: Configure Environment Variables
|
| 43 |
+
Create a `.env` file in the project root:
|
| 44 |
+
```env
|
| 45 |
+
# Databricks Configuration
|
| 46 |
+
DATABRICKS_HOST=https://your-workspace.databricks.com
|
| 47 |
+
DATABRICKS_TOKEN=your-personal-access-token
|
| 48 |
+
DATABRICKS_WAREHOUSE_ID=your-warehouse-id # Optional
|
| 49 |
+
|
| 50 |
+
# Anthropic Configuration
|
| 51 |
+
ANTHROPIC_API_KEY=sk-ant-your-api-key
|
| 52 |
+
|
| 53 |
+
# Server Configuration (Optional)
|
| 54 |
+
SERVER_HOST=0.0.0.0
|
| 55 |
+
SERVER_PORT=5000
|
| 56 |
+
SERVER_WORKERS=4
|
| 57 |
+
FLASK_ENV=development
|
| 58 |
+
|
| 59 |
+
# Feature Flags (Optional)
|
| 60 |
+
ENABLE_AUTH=false
|
| 61 |
+
ENABLE_SQL_EXECUTION=false
|
| 62 |
+
LOG_LEVEL=INFO
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
**Important:** Never commit `.env` to version control. Add it to `.gitignore`:
|
| 66 |
+
```
|
| 67 |
+
.env
|
| 68 |
+
*.pyc
|
| 69 |
+
__pycache__/
|
| 70 |
+
venv/
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
### Step 5: Run Tests (Mock-Only, No Credentials Required)
|
| 74 |
+
```bash
|
| 75 |
+
# Run all tests
|
| 76 |
+
pytest test_chatbot.py -v
|
| 77 |
+
|
| 78 |
+
# Run specific test class
|
| 79 |
+
pytest test_chatbot.py::TestUnityCatalogService -v
|
| 80 |
+
|
| 81 |
+
# Run with coverage
|
| 82 |
+
pytest test_chatbot.py --cov=. --cov-report=html
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
Expected output:
|
| 86 |
+
```
|
| 87 |
+
======================== 23 passed in 0.52s =======================
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
### Step 6: Run Development Server
|
| 91 |
+
```bash
|
| 92 |
+
python app.py
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
Server starts on `http://localhost:5000`
|
| 96 |
+
|
| 97 |
+
### Step 7: Test API Endpoints
|
| 98 |
+
```bash
|
| 99 |
+
# Health check
|
| 100 |
+
curl http://localhost:5000/api/health
|
| 101 |
+
|
| 102 |
+
# List catalogs
|
| 103 |
+
curl http://localhost:5000/api/catalogs
|
| 104 |
+
|
| 105 |
+
# Chat endpoint
|
| 106 |
+
curl -X POST http://localhost:5000/api/chat \
|
| 107 |
+
-H "Content-Type: application/json" \
|
| 108 |
+
-d '{"message": "Create a catalog named sales_data"}'
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
---
|
| 112 |
+
|
| 113 |
+
## Part 2: Docker Deployment
|
| 114 |
+
|
| 115 |
+
### Step 1: Build Docker Image
|
| 116 |
+
```bash
|
| 117 |
+
docker build -t unitycatalog-chatbot:latest .
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
### Step 2: Run Container (Development)
|
| 121 |
+
```bash
|
| 122 |
+
docker run -p 5000:5000 \
|
| 123 |
+
-e DATABRICKS_HOST="https://your-workspace.databricks.com" \
|
| 124 |
+
-e DATABRICKS_TOKEN="your-token" \
|
| 125 |
+
-e ANTHROPIC_API_KEY="sk-ant-your-key" \
|
| 126 |
+
unitycatalog-chatbot:latest
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
### Step 3: Run with Docker Compose
|
| 130 |
+
```bash
|
| 131 |
+
docker-compose up -d
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
Check logs:
|
| 135 |
+
```bash
|
| 136 |
+
docker-compose logs -f app
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
Stop containers:
|
| 140 |
+
```bash
|
| 141 |
+
docker-compose down
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
### Step 4: Verify Container Health
|
| 145 |
+
```bash
|
| 146 |
+
curl http://localhost:5000/api/health
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## Part 3: Cloud Deployment
|
| 152 |
+
|
| 153 |
+
### AWS ECS (Elastic Container Service)
|
| 154 |
+
|
| 155 |
+
#### Prerequisites
|
| 156 |
+
- AWS account with ECS access
|
| 157 |
+
- ECR (Elastic Container Registry) repository created
|
| 158 |
+
- IAM role for ECS task execution
|
| 159 |
+
|
| 160 |
+
#### Steps
|
| 161 |
+
1. **Push image to ECR:**
|
| 162 |
+
```bash
|
| 163 |
+
# Get login token
|
| 164 |
+
aws ecr get-login-password --region us-east-1 | \
|
| 165 |
+
docker login --username AWS --password-stdin \
|
| 166 |
+
YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com
|
| 167 |
+
|
| 168 |
+
# Tag image
|
| 169 |
+
docker tag unitycatalog-chatbot:latest \
|
| 170 |
+
YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/unitycatalog-chatbot:latest
|
| 171 |
+
|
| 172 |
+
# Push to ECR
|
| 173 |
+
docker push YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/unitycatalog-chatbot:latest
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
2. **Create ECS Task Definition** (JSON):
|
| 177 |
+
```json
|
| 178 |
+
{
|
| 179 |
+
"family": "unitycatalog-chatbot",
|
| 180 |
+
"containerDefinitions": [
|
| 181 |
+
{
|
| 182 |
+
"name": "app",
|
| 183 |
+
"image": "YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/unitycatalog-chatbot:latest",
|
| 184 |
+
"portMappings": [{"containerPort": 5000, "hostPort": 5000}],
|
| 185 |
+
"environment": [
|
| 186 |
+
{"name": "DATABRICKS_HOST", "value": "https://your-workspace.databricks.com"},
|
| 187 |
+
{"name": "SERVER_HOST", "value": "0.0.0.0"},
|
| 188 |
+
{"name": "SERVER_PORT", "value": "5000"}
|
| 189 |
+
],
|
| 190 |
+
"secrets": [
|
| 191 |
+
{"name": "DATABRICKS_TOKEN", "valueFrom": "arn:aws:secretsmanager:..."},
|
| 192 |
+
{"name": "ANTHROPIC_API_KEY", "valueFrom": "arn:aws:secretsmanager:..."}
|
| 193 |
+
],
|
| 194 |
+
"logConfiguration": {
|
| 195 |
+
"logDriver": "awslogs",
|
| 196 |
+
"options": {
|
| 197 |
+
"awslogs-group": "/ecs/unitycatalog-chatbot",
|
| 198 |
+
"awslogs-region": "us-east-1",
|
| 199 |
+
"awslogs-stream-prefix": "ecs"
|
| 200 |
+
}
|
| 201 |
+
}
|
| 202 |
+
}
|
| 203 |
+
],
|
| 204 |
+
"requiresCompatibilities": ["FARGATE"],
|
| 205 |
+
"networkMode": "awsvpc",
|
| 206 |
+
"cpu": "256",
|
| 207 |
+
"memory": "512"
|
| 208 |
+
}
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
3. **Create ECS Service** using AWS Console or CLI:
|
| 212 |
+
```bash
|
| 213 |
+
aws ecs create-service \
|
| 214 |
+
--cluster my-cluster \
|
| 215 |
+
--service-name unitycatalog-chatbot \
|
| 216 |
+
--task-definition unitycatalog-chatbot:1 \
|
| 217 |
+
--desired-count 2 \
|
| 218 |
+
--launch-type FARGATE \
|
| 219 |
+
--network-configuration "awsvpcConfiguration={subnets=[subnet-xxx],securityGroups=[sg-xxx]}"
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
4. **Monitor service:**
|
| 223 |
+
```bash
|
| 224 |
+
aws ecs describe-services \
|
| 225 |
+
--cluster my-cluster \
|
| 226 |
+
--services unitycatalog-chatbot
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
### Kubernetes Deployment
|
| 230 |
+
|
| 231 |
+
#### Prerequisites
|
| 232 |
+
- Kubernetes cluster (EKS, AKS, GKE, or local)
|
| 233 |
+
- `kubectl` CLI configured
|
| 234 |
+
- Docker image pushed to container registry
|
| 235 |
+
|
| 236 |
+
#### Steps
|
| 237 |
+
|
| 238 |
+
1. **Create Kubernetes Secrets:**
|
| 239 |
+
```bash
|
| 240 |
+
kubectl create secret generic unitycatalog-secrets \
|
| 241 |
+
--from-literal=DATABRICKS_TOKEN='your-token' \
|
| 242 |
+
--from-literal=ANTHROPIC_API_KEY='sk-ant-your-key'
|
| 243 |
+
```
|
| 244 |
+
|
| 245 |
+
2. **Create Deployment (deployment.yaml):**
|
| 246 |
+
```yaml
|
| 247 |
+
apiVersion: apps/v1
|
| 248 |
+
kind: Deployment
|
| 249 |
+
metadata:
|
| 250 |
+
name: unitycatalog-chatbot
|
| 251 |
+
labels:
|
| 252 |
+
app: unitycatalog-chatbot
|
| 253 |
+
spec:
|
| 254 |
+
replicas: 3
|
| 255 |
+
selector:
|
| 256 |
+
matchLabels:
|
| 257 |
+
app: unitycatalog-chatbot
|
| 258 |
+
template:
|
| 259 |
+
metadata:
|
| 260 |
+
labels:
|
| 261 |
+
app: unitycatalog-chatbot
|
| 262 |
+
spec:
|
| 263 |
+
containers:
|
| 264 |
+
- name: app
|
| 265 |
+
image: your-registry/unitycatalog-chatbot:latest
|
| 266 |
+
ports:
|
| 267 |
+
- containerPort: 5000
|
| 268 |
+
env:
|
| 269 |
+
- name: DATABRICKS_HOST
|
| 270 |
+
value: "https://your-workspace.databricks.com"
|
| 271 |
+
- name: SERVER_HOST
|
| 272 |
+
value: "0.0.0.0"
|
| 273 |
+
- name: SERVER_PORT
|
| 274 |
+
value: "5000"
|
| 275 |
+
envFrom:
|
| 276 |
+
- secretRef:
|
| 277 |
+
name: unitycatalog-secrets
|
| 278 |
+
livenessProbe:
|
| 279 |
+
httpGet:
|
| 280 |
+
path: /api/health
|
| 281 |
+
port: 5000
|
| 282 |
+
initialDelaySeconds: 30
|
| 283 |
+
periodSeconds: 10
|
| 284 |
+
readinessProbe:
|
| 285 |
+
httpGet:
|
| 286 |
+
path: /api/health
|
| 287 |
+
port: 5000
|
| 288 |
+
initialDelaySeconds: 10
|
| 289 |
+
periodSeconds: 5
|
| 290 |
+
resources:
|
| 291 |
+
requests:
|
| 292 |
+
memory: "256Mi"
|
| 293 |
+
cpu: "250m"
|
| 294 |
+
limits:
|
| 295 |
+
memory: "512Mi"
|
| 296 |
+
cpu: "500m"
|
| 297 |
+
---
|
| 298 |
+
apiVersion: v1
|
| 299 |
+
kind: Service
|
| 300 |
+
metadata:
|
| 301 |
+
name: unitycatalog-chatbot-service
|
| 302 |
+
spec:
|
| 303 |
+
type: LoadBalancer
|
| 304 |
+
ports:
|
| 305 |
+
- port: 80
|
| 306 |
+
targetPort: 5000
|
| 307 |
+
selector:
|
| 308 |
+
app: unitycatalog-chatbot
|
| 309 |
+
```
|
| 310 |
+
|
| 311 |
+
3. **Deploy to Kubernetes:**
|
| 312 |
+
```bash
|
| 313 |
+
kubectl apply -f deployment.yaml
|
| 314 |
+
```
|
| 315 |
+
|
| 316 |
+
4. **Monitor deployment:**
|
| 317 |
+
```bash
|
| 318 |
+
# Check status
|
| 319 |
+
kubectl get pods -l app=unitycatalog-chatbot
|
| 320 |
+
|
| 321 |
+
# Check logs
|
| 322 |
+
kubectl logs -f deployment/unitycatalog-chatbot
|
| 323 |
+
|
| 324 |
+
# Get service endpoint
|
| 325 |
+
kubectl get service unitycatalog-chatbot-service
|
| 326 |
+
```
|
| 327 |
+
|
| 328 |
+
### Azure Container Instances (ACI)
|
| 329 |
+
|
| 330 |
+
1. **Push image to Azure Container Registry:**
|
| 331 |
+
```bash
|
| 332 |
+
az acr build --registry your-acr-name \
|
| 333 |
+
--image unitycatalog-chatbot:latest .
|
| 334 |
+
```
|
| 335 |
+
|
| 336 |
+
2. **Deploy to ACI:**
|
| 337 |
+
```bash
|
| 338 |
+
az container create \
|
| 339 |
+
--resource-group your-rg \
|
| 340 |
+
--name unitycatalog-chatbot \
|
| 341 |
+
--image your-acr-name.azurecr.io/unitycatalog-chatbot:latest \
|
| 342 |
+
--cpu 1 --memory 1 \
|
| 343 |
+
--ports 5000 \
|
| 344 |
+
--environment-variables \
|
| 345 |
+
DATABRICKS_HOST="https://your-workspace.databricks.com" \
|
| 346 |
+
SERVER_HOST="0.0.0.0" \
|
| 347 |
+
SERVER_PORT="5000" \
|
| 348 |
+
--secure-environment-variables \
|
| 349 |
+
DATABRICKS_TOKEN="your-token" \
|
| 350 |
+
ANTHROPIC_API_KEY="sk-ant-your-key"
|
| 351 |
+
```
|
| 352 |
+
|
| 353 |
+
### Hugging Face Spaces (Docker)
|
| 354 |
+
|
| 355 |
+
#### Prerequisites
|
| 356 |
+
- Hugging Face account (free at https://huggingface.co/join)
|
| 357 |
+
- Repository on Hugging Face Hub (create at https://huggingface.co/new)
|
| 358 |
+
|
| 359 |
+
#### Step 1: Create Hugging Face Repository
|
| 360 |
+
1. Go to https://huggingface.co/new
|
| 361 |
+
2. Enter repository name: `unitycatalog-chatbot`
|
| 362 |
+
3. Select **Space** type
|
| 363 |
+
4. Choose **Docker** runtime
|
| 364 |
+
5. Click **Create repository**
|
| 365 |
+
|
| 366 |
+
#### Step 2: Clone Space Repository
|
| 367 |
+
```bash
|
| 368 |
+
git clone https://huggingface.co/spaces/your-username/unitycatalog-chatbot
|
| 369 |
+
cd unitycatalog-chatbot
|
| 370 |
+
```
|
| 371 |
+
|
| 372 |
+
#### Step 3: Copy Project Files
|
| 373 |
+
```bash
|
| 374 |
+
# Copy source files
|
| 375 |
+
cp -r ../UnityCatalog-ChatBot/* .
|
| 376 |
+
|
| 377 |
+
# Ensure key files are present:
|
| 378 |
+
# - app.py
|
| 379 |
+
# - unity_catalog_service.py
|
| 380 |
+
# - config.py
|
| 381 |
+
# - requirements.txt
|
| 382 |
+
# - Dockerfile
|
| 383 |
+
# - test_chatbot.py (optional)
|
| 384 |
+
```
|
| 385 |
+
|
| 386 |
+
#### Step 4: Create `README.md` for Space
|
| 387 |
+
```markdown
|
| 388 |
+
---
|
| 389 |
+
title: Unity Catalog Chatbot
|
| 390 |
+
emoji: 💬
|
| 391 |
+
colorFrom: blue
|
| 392 |
+
colorTo: purple
|
| 393 |
+
sdk: docker
|
| 394 |
+
pinned: false
|
| 395 |
+
---
|
| 396 |
+
|
| 397 |
+
# Unity Catalog Chatbot
|
| 398 |
+
|
| 399 |
+
Natural language interface for Databricks Unity Catalog operations powered by Claude AI.
|
| 400 |
+
|
| 401 |
+
## Features
|
| 402 |
+
- Create catalogs, schemas, and tables via natural language
|
| 403 |
+
- Grant and revoke permissions
|
| 404 |
+
- List objects across Unity Catalog
|
| 405 |
+
- Execute SQL queries
|
| 406 |
+
|
| 407 |
+
## API Endpoints
|
| 408 |
+
- `GET /api/health` - Health check
|
| 409 |
+
- `POST /api/chat` - Chat with Claude to manage Unity Catalog
|
| 410 |
+
- `GET /api/catalogs` - List all catalogs
|
| 411 |
+
- `GET /api/schemas/<catalog>` - List schemas
|
| 412 |
+
- `GET /api/tables/<catalog>/<schema>` - List tables
|
| 413 |
+
|
| 414 |
+
## Environment Variables
|
| 415 |
+
Required:
|
| 416 |
+
- `DATABRICKS_HOST` - Databricks workspace URL
|
| 417 |
+
- `DATABRICKS_TOKEN` - Personal access token
|
| 418 |
+
- `ANTHROPIC_API_KEY` - Claude API key
|
| 419 |
+
|
| 420 |
+
Optional:
|
| 421 |
+
- `SERVER_PORT` - Port to run on (default: 5000)
|
| 422 |
+
- `LOG_LEVEL` - Logging level (default: INFO)
|
| 423 |
+
```
|
| 424 |
+
|
| 425 |
+
#### Step 5: Create `secrets.toml` for Credentials
|
| 426 |
+
Create `.streamlit/secrets.toml` (Hugging Face Spaces will hide these):
|
| 427 |
+
|
| 428 |
+
**⚠️ IMPORTANT: Never commit secrets to git**
|
| 429 |
+
|
| 430 |
+
Instead, use Hugging Face Secrets management:
|
| 431 |
+
|
| 432 |
+
1. Go to your Space Settings → Secrets
|
| 433 |
+
2. Add secrets:
|
| 434 |
+
- `DATABRICKS_HOST` = `https://your-workspace.databricks.com`
|
| 435 |
+
- `DATABRICKS_TOKEN` = your token
|
| 436 |
+
- `ANTHROPIC_API_KEY` = your API key
|
| 437 |
+
|
| 438 |
+
Or set via UI environment variables in Space settings.
|
| 439 |
+
|
| 440 |
+
#### Step 6: Update Dockerfile for Space (if needed)
|
| 441 |
+
```dockerfile
|
| 442 |
+
FROM python:3.11-slim
|
| 443 |
+
|
| 444 |
+
WORKDIR /app
|
| 445 |
+
|
| 446 |
+
COPY requirements.txt .
|
| 447 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 448 |
+
|
| 449 |
+
COPY . .
|
| 450 |
+
|
| 451 |
+
# Health check
|
| 452 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
| 453 |
+
CMD python -c "import requests; requests.get('http://localhost:5000/api/health')"
|
| 454 |
+
|
| 455 |
+
CMD ["python", "app.py"]
|
| 456 |
+
```
|
| 457 |
+
|
| 458 |
+
#### Step 7: Push to Hugging Face
|
| 459 |
+
```bash
|
| 460 |
+
# Configure git with HF credentials
|
| 461 |
+
git config user.name "Your Name"
|
| 462 |
+
git config user.email "your-email@example.com"
|
| 463 |
+
|
| 464 |
+
# Add files
|
| 465 |
+
git add .
|
| 466 |
+
|
| 467 |
+
# Commit
|
| 468 |
+
git commit -m "Initial deployment"
|
| 469 |
+
|
| 470 |
+
# Push (triggers auto-build and deployment)
|
| 471 |
+
git push
|
| 472 |
+
```
|
| 473 |
+
|
| 474 |
+
**Space will automatically:**
|
| 475 |
+
- Build Docker image
|
| 476 |
+
- Deploy to Hugging Face infrastructure
|
| 477 |
+
- Provide public URL: `https://huggingface.co/spaces/your-username/unitycatalog-chatbot`
|
| 478 |
+
|
| 479 |
+
#### Step 8: Verify Deployment
|
| 480 |
+
```bash
|
| 481 |
+
# Once Space is running, test the endpoint:
|
| 482 |
+
curl https://your-username-unitycatalog-chatbot.hf.space/api/health
|
| 483 |
+
|
| 484 |
+
# Chat endpoint
|
| 485 |
+
curl -X POST https://your-username-unitycatalog-chatbot.hf.space/api/chat \
|
| 486 |
+
-H "Content-Type: application/json" \
|
| 487 |
+
-d '{"message": "List all catalogs"}'
|
| 488 |
+
```
|
| 489 |
+
|
| 490 |
+
#### Step 9: Set Secrets in Hugging Face UI
|
| 491 |
+
1. Go to Space → Settings → Secrets
|
| 492 |
+
2. Add environment variables:
|
| 493 |
+
- `DATABRICKS_HOST`
|
| 494 |
+
- `DATABRICKS_TOKEN`
|
| 495 |
+
- `ANTHROPIC_API_KEY`
|
| 496 |
+
|
| 497 |
+
3. Space will restart automatically with secrets loaded
|
| 498 |
+
|
| 499 |
+
#### Troubleshooting Hugging Face Spaces
|
| 500 |
+
|
| 501 |
+
**Issue: "Build failed"**
|
| 502 |
+
- Check **Build logs** in Space settings
|
| 503 |
+
- Ensure `Dockerfile` is present
|
| 504 |
+
- Verify `requirements.txt` has all dependencies
|
| 505 |
+
|
| 506 |
+
**Issue: "Application won't start"**
|
| 507 |
+
- Check **Runtime logs** in Space
|
| 508 |
+
- Verify environment variables are set in Secrets
|
| 509 |
+
- Test locally: `docker build -t test . && docker run -p 5000:5000 test`
|
| 510 |
+
|
| 511 |
+
**Issue: "Port already in use"**
|
| 512 |
+
- Hugging Face assigns a port automatically
|
| 513 |
+
- Ensure `app.py` uses environment variable for port:
|
| 514 |
+
```python
|
| 515 |
+
if __name__ == '__main__':
|
| 516 |
+
port = int(os.getenv("SERVER_PORT", 5000))
|
| 517 |
+
app.run(host='0.0.0.0', port=port)
|
| 518 |
+
```
|
| 519 |
+
|
| 520 |
+
**Issue: "API calls timeout"**
|
| 521 |
+
- Databricks/Anthropic credentials invalid
|
| 522 |
+
- Network connectivity issue
|
| 523 |
+
- Test locally first with real credentials
|
| 524 |
+
|
| 525 |
+
#### Hugging Face Space Features
|
| 526 |
+
|
| 527 |
+
- **Public URL:** `https://huggingface.co/spaces/your-username/unitycatalog-chatbot`
|
| 528 |
+
- **Auto-scaling:** Handles traffic spikes
|
| 529 |
+
- **Free tier:** Up to 2 CPU cores (enough for light use)
|
| 530 |
+
- **Persistent storage:** `/tmp` directory available (ephemeral)
|
| 531 |
+
- **Custom domain:** Upgrade to pro for custom domains
|
| 532 |
+
|
| 533 |
+
#### Sharing Your Space
|
| 534 |
+
|
| 535 |
+
1. Go to Space page
|
| 536 |
+
2. Click **Share** button
|
| 537 |
+
3. Copy shareable link or embed code:
|
| 538 |
+
```html
|
| 539 |
+
<iframe
|
| 540 |
+
src="https://huggingface.co/spaces/your-username/unitycatalog-chatbot?embed=true"
|
| 541 |
+
frameborder="0"
|
| 542 |
+
width="800"
|
| 543 |
+
height="600"
|
| 544 |
+
></iframe>
|
| 545 |
+
```
|
| 546 |
+
|
| 547 |
+
---
|
| 548 |
+
|
| 549 |
+
## Part 4: Production Configuration
|
| 550 |
+
|
| 551 |
+
### Security Best Practices
|
| 552 |
+
|
| 553 |
+
1. **Enable Authentication:**
|
| 554 |
+
```env
|
| 555 |
+
ENABLE_AUTH=true
|
| 556 |
+
API_KEY_HEADER=X-API-Key
|
| 557 |
+
```
|
| 558 |
+
|
| 559 |
+
Add API key header to requests:
|
| 560 |
+
```bash
|
| 561 |
+
curl -H "X-API-Key: your-api-key" http://localhost:5000/api/health
|
| 562 |
+
```
|
| 563 |
+
|
| 564 |
+
2. **Rate Limiting:**
|
| 565 |
+
```env
|
| 566 |
+
RATE_LIMIT_PER_MINUTE=60
|
| 567 |
+
```
|
| 568 |
+
|
| 569 |
+
3. **HTTPS/TLS:**
|
| 570 |
+
Use reverse proxy (Nginx, HAProxy) to terminate TLS:
|
| 571 |
+
```nginx
|
| 572 |
+
server {
|
| 573 |
+
listen 443 ssl;
|
| 574 |
+
server_name your-domain.com;
|
| 575 |
+
|
| 576 |
+
ssl_certificate /path/to/cert.pem;
|
| 577 |
+
ssl_certificate_key /path/to/key.pem;
|
| 578 |
+
|
| 579 |
+
location / {
|
| 580 |
+
proxy_pass http://localhost:5000;
|
| 581 |
+
proxy_set_header X-Forwarded-For $remote_addr;
|
| 582 |
+
}
|
| 583 |
+
}
|
| 584 |
+
```
|
| 585 |
+
|
| 586 |
+
4. **Environment Variables:**
|
| 587 |
+
Use secret management (AWS Secrets Manager, Azure Key Vault, HashiCorp Vault):
|
| 588 |
+
```bash
|
| 589 |
+
# AWS
|
| 590 |
+
aws secretsmanager get-secret-value --secret-id unitycatalog-secrets
|
| 591 |
+
|
| 592 |
+
# Azure
|
| 593 |
+
az keyvault secret show --vault-name your-vault --name DATABRICKS_TOKEN
|
| 594 |
+
```
|
| 595 |
+
|
| 596 |
+
### Logging & Monitoring
|
| 597 |
+
|
| 598 |
+
1. **Enable comprehensive logging:**
|
| 599 |
+
```env
|
| 600 |
+
LOG_LEVEL=INFO
|
| 601 |
+
LOG_TO_FILE=true
|
| 602 |
+
LOG_FILE_PATH=/var/log/chatbot.log
|
| 603 |
+
```
|
| 604 |
+
|
| 605 |
+
2. **Application Insights / DataDog / CloudWatch:**
|
| 606 |
+
Logs are automatically captured by container orchestration platforms.
|
| 607 |
+
|
| 608 |
+
### Performance Tuning
|
| 609 |
+
|
| 610 |
+
1. **Gunicorn workers** (production):
|
| 611 |
+
```bash
|
| 612 |
+
gunicorn --workers 4 --bind 0.0.0.0:5000 app:app
|
| 613 |
+
```
|
| 614 |
+
|
| 615 |
+
2. **Caching:**
|
| 616 |
+
```env
|
| 617 |
+
ENABLE_CACHING=true
|
| 618 |
+
CACHE_TTL=300
|
| 619 |
+
```
|
| 620 |
+
|
| 621 |
+
---
|
| 622 |
+
|
| 623 |
+
## Part 5: Health Checks & Validation
|
| 624 |
+
|
| 625 |
+
### Pre-Deployment Checklist
|
| 626 |
+
|
| 627 |
+
- [ ] All tests pass: `pytest test_chatbot.py -v`
|
| 628 |
+
- [ ] `.env` file configured with valid credentials
|
| 629 |
+
- [ ] Docker image builds successfully
|
| 630 |
+
- [ ] Health endpoint responds: `curl /api/health`
|
| 631 |
+
- [ ] Sample requests succeed (catalog listing, chat)
|
| 632 |
+
- [ ] Logs show no errors
|
| 633 |
+
|
| 634 |
+
### Post-Deployment Validation
|
| 635 |
+
|
| 636 |
+
```bash
|
| 637 |
+
# Health check
|
| 638 |
+
curl https://your-api-endpoint/api/health
|
| 639 |
+
|
| 640 |
+
# Test chat endpoint
|
| 641 |
+
curl -X POST https://your-api-endpoint/api/chat \
|
| 642 |
+
-H "Content-Type: application/json" \
|
| 643 |
+
-H "X-API-Key: your-api-key" \
|
| 644 |
+
-d '{"message": "List all catalogs"}'
|
| 645 |
+
|
| 646 |
+
# Check logs
|
| 647 |
+
kubectl logs deployment/unitycatalog-chatbot # K8s
|
| 648 |
+
docker logs <container-id> # Docker
|
| 649 |
+
aws logs tail /ecs/unitycatalog-chatbot --follow # ECS
|
| 650 |
+
```
|
| 651 |
+
|
| 652 |
+
---
|
| 653 |
+
|
| 654 |
+
## Part 6: Troubleshooting
|
| 655 |
+
|
| 656 |
+
### Common Issues
|
| 657 |
+
|
| 658 |
+
**1. "Cannot configure default credentials"**
|
| 659 |
+
- Ensure `.env` file has valid `DATABRICKS_HOST` and `DATABRICKS_TOKEN`
|
| 660 |
+
- Verify token format (starts with `dapi`)
|
| 661 |
+
|
| 662 |
+
**2. "Invalid Anthropic API key"**
|
| 663 |
+
- Confirm key starts with `sk-ant-`
|
| 664 |
+
- Check key has not expired
|
| 665 |
+
|
| 666 |
+
**3. "Port 5000 already in use"**
|
| 667 |
+
```bash
|
| 668 |
+
# Kill process using port
|
| 669 |
+
lsof -ti:5000 | xargs kill -9 # macOS/Linux
|
| 670 |
+
netstat -ano | findstr :5000 & taskkill /PID <PID> /F # Windows
|
| 671 |
+
```
|
| 672 |
+
|
| 673 |
+
**4. Docker build fails**
|
| 674 |
+
```bash
|
| 675 |
+
docker build --no-cache -t unitycatalog-chatbot:latest .
|
| 676 |
+
```
|
| 677 |
+
|
| 678 |
+
**5. Tests fail in CI/CD**
|
| 679 |
+
- Tests use mocks and don't require credentials
|
| 680 |
+
- If failing, check Python version (3.9+) and pytest version
|
| 681 |
+
|
| 682 |
+
### Get Help
|
| 683 |
+
|
| 684 |
+
Check logs for detailed error messages:
|
| 685 |
+
```bash
|
| 686 |
+
# Local
|
| 687 |
+
python app.py # stdout
|
| 688 |
+
|
| 689 |
+
# Docker
|
| 690 |
+
docker logs <container-name>
|
| 691 |
+
|
| 692 |
+
# Kubernetes
|
| 693 |
+
kubectl logs <pod-name> -c app
|
| 694 |
+
```
|
| 695 |
+
|
| 696 |
+
---
|
| 697 |
+
|
| 698 |
+
## Part 7: Scaling & Maintenance
|
| 699 |
+
|
| 700 |
+
### Horizontal Scaling
|
| 701 |
+
|
| 702 |
+
- **Docker Compose:** Increase `replicas` in docker-compose.yml
|
| 703 |
+
- **Kubernetes:** `kubectl scale deployment unitycatalog-chatbot --replicas=5`
|
| 704 |
+
- **ECS:** Update desired task count in AWS Console
|
| 705 |
+
|
| 706 |
+
### Updates & Rollbacks
|
| 707 |
+
|
| 708 |
+
1. **Build new image:**
|
| 709 |
+
```bash
|
| 710 |
+
docker build -t unitycatalog-chatbot:v1.1.0 .
|
| 711 |
+
```
|
| 712 |
+
|
| 713 |
+
2. **Push to registry:**
|
| 714 |
+
```bash
|
| 715 |
+
docker push your-registry/unitycatalog-chatbot:v1.1.0
|
| 716 |
+
```
|
| 717 |
+
|
| 718 |
+
3. **Update deployment:**
|
| 719 |
+
```bash
|
| 720 |
+
# Kubernetes
|
| 721 |
+
kubectl set image deployment/unitycatalog-chatbot \
|
| 722 |
+
app=your-registry/unitycatalog-chatbot:v1.1.0
|
| 723 |
+
|
| 724 |
+
# ECS (update task definition version)
|
| 725 |
+
aws ecs update-service \
|
| 726 |
+
--cluster my-cluster \
|
| 727 |
+
--service unitycatalog-chatbot \
|
| 728 |
+
--task-definition unitycatalog-chatbot:2
|
| 729 |
+
```
|
| 730 |
+
|
| 731 |
+
4. **Rollback if needed:**
|
| 732 |
+
```bash
|
| 733 |
+
# Kubernetes
|
| 734 |
+
kubectl rollout undo deployment/unitycatalog-chatbot
|
| 735 |
+
|
| 736 |
+
# ECS
|
| 737 |
+
aws ecs update-service \
|
| 738 |
+
--cluster my-cluster \
|
| 739 |
+
--service unitycatalog-chatbot \
|
| 740 |
+
--task-definition unitycatalog-chatbot:1
|
| 741 |
+
```
|
| 742 |
+
|
| 743 |
+
---
|
| 744 |
+
|
| 745 |
+
## Summary
|
| 746 |
+
|
| 747 |
+
| Deployment Type | Complexity | Best For |
|
| 748 |
+
|---|---|---|
|
| 749 |
+
| **Local** | Easy | Development, testing |
|
| 750 |
+
| **Docker** | Medium | Single machine, CI/CD |
|
| 751 |
+
| **Kubernetes** | Hard | Enterprise, multi-region, auto-scaling |
|
| 752 |
+
| **ECS** | Medium | AWS-only deployments |
|
| 753 |
+
| **ACI** | Medium | Quick Azure deployments |
|
| 754 |
+
|
| 755 |
+
Choose based on your infrastructure and scaling needs.
|
Dockerfile
CHANGED
|
@@ -20,6 +20,8 @@ RUN pip install --no-cache-dir gunicorn
|
|
| 20 |
# Copy application files
|
| 21 |
COPY app.py .
|
| 22 |
COPY unity_catalog_service.py .
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Create non-root user
|
| 25 |
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
|
@@ -28,9 +30,5 @@ USER appuser
|
|
| 28 |
# Expose port
|
| 29 |
EXPOSE 5000
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
|
| 33 |
-
CMD python -c "import requests; requests.get('http://localhost:5000/api/health')"
|
| 34 |
-
|
| 35 |
-
# Run with gunicorn
|
| 36 |
-
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "120", "app:app"]
|
|
|
|
| 20 |
# Copy application files
|
| 21 |
COPY app.py .
|
| 22 |
COPY unity_catalog_service.py .
|
| 23 |
+
COPY config.py .
|
| 24 |
+
COPY conftest.py .
|
| 25 |
|
| 26 |
# Create non-root user
|
| 27 |
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
|
|
|
| 30 |
# Expose port
|
| 31 |
EXPOSE 5000
|
| 32 |
|
| 33 |
+
# Run with gunicorn (production server)
|
| 34 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "2", "--timeout", "120", "--access-logfile", "-", "app:app"]
|
|
|
|
|
|
|
|
|
|
|
|
HF_DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Deploy UnityCatalog-ChatBot on Hugging Face Spaces
|
| 2 |
+
|
| 3 |
+
Quick 5-minute deployment guide for Hugging Face Spaces.
|
| 4 |
+
|
| 5 |
+
## Prerequisites
|
| 6 |
+
- Hugging Face account (free signup: https://huggingface.co/join)
|
| 7 |
+
- Databricks credentials (host + token)
|
| 8 |
+
- Anthropic API key
|
| 9 |
+
|
| 10 |
+
## Quick Start
|
| 11 |
+
|
| 12 |
+
### 1️⃣ Create Space on Hugging Face
|
| 13 |
+
```bash
|
| 14 |
+
# Go to: https://huggingface.co/new
|
| 15 |
+
# - Name: unitycatalog-chatbot
|
| 16 |
+
# - Type: Space
|
| 17 |
+
# - Runtime: Docker
|
| 18 |
+
# - Click "Create Space"
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
### 2️⃣ Clone the Space
|
| 22 |
+
```bash
|
| 23 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/unitycatalog-chatbot
|
| 24 |
+
cd unitycatalog-chatbot
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
### 3️⃣ Copy Project Files
|
| 28 |
+
```bash
|
| 29 |
+
# Copy all files from UnityCatalog-ChatBot to your Space
|
| 30 |
+
cp -r ../UnityCatalog-ChatBot/* .
|
| 31 |
+
|
| 32 |
+
# Required files:
|
| 33 |
+
# - app.py
|
| 34 |
+
# - unity_catalog_service.py
|
| 35 |
+
# - config.py
|
| 36 |
+
# - requirements.txt
|
| 37 |
+
# - Dockerfile
|
| 38 |
+
# - README.md (already created)
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### 4️⃣ Add README for Space
|
| 42 |
+
Create `README.md`:
|
| 43 |
+
```markdown
|
| 44 |
+
---
|
| 45 |
+
title: Unity Catalog Chatbot
|
| 46 |
+
emoji: 💬
|
| 47 |
+
colorFrom: blue
|
| 48 |
+
colorTo: purple
|
| 49 |
+
sdk: docker
|
| 50 |
+
pinned: false
|
| 51 |
+
---
|
| 52 |
+
|
| 53 |
+
# Unity Catalog Chatbot
|
| 54 |
+
|
| 55 |
+
Chat interface for Databricks Unity Catalog management.
|
| 56 |
+
|
| 57 |
+
## Setup
|
| 58 |
+
Add secrets in Space settings:
|
| 59 |
+
- `DATABRICKS_HOST`
|
| 60 |
+
- `DATABRICKS_TOKEN`
|
| 61 |
+
- `ANTHROPIC_API_KEY`
|
| 62 |
+
|
| 63 |
+
Visit https://huggingface.co/spaces/YOUR_USERNAME/unitycatalog-chatbot
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### 5️⃣ Push to Hugging Face
|
| 67 |
+
```bash
|
| 68 |
+
git add .
|
| 69 |
+
git commit -m "Deploy to HF Spaces"
|
| 70 |
+
git push
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
### 6️⃣ Add Secrets
|
| 74 |
+
1. Go to Space → **Settings** → **Secrets**
|
| 75 |
+
2. Add three secrets:
|
| 76 |
+
- `DATABRICKS_HOST` = `https://your-workspace.databricks.com`
|
| 77 |
+
- `DATABRICKS_TOKEN` = your PAT
|
| 78 |
+
- `ANTHROPIC_API_KEY` = your key
|
| 79 |
+
|
| 80 |
+
3. Space rebuilds automatically ✅
|
| 81 |
+
|
| 82 |
+
### 7️⃣ Access Your App
|
| 83 |
+
- URL: `https://huggingface.co/spaces/YOUR_USERNAME/unitycatalog-chatbot`
|
| 84 |
+
- API: `https://YOUR_USERNAME-unitycatalog-chatbot.hf.space/api/`
|
| 85 |
+
|
| 86 |
+
## Test Endpoints
|
| 87 |
+
|
| 88 |
+
```bash
|
| 89 |
+
# Health check
|
| 90 |
+
curl https://YOUR_USERNAME-unitycatalog-chatbot.hf.space/api/health
|
| 91 |
+
|
| 92 |
+
# List catalogs
|
| 93 |
+
curl https://YOUR_USERNAME-unitycatalog-chatbot.hf.space/api/catalogs
|
| 94 |
+
|
| 95 |
+
# Chat
|
| 96 |
+
curl -X POST https://YOUR_USERNAME-unitycatalog-chatbot.hf.space/api/chat \
|
| 97 |
+
-H "Content-Type: application/json" \
|
| 98 |
+
-d '{"message": "Create a catalog named demo"}'
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
## Common Issues
|
| 102 |
+
|
| 103 |
+
| Issue | Solution |
|
| 104 |
+
|-------|----------|
|
| 105 |
+
| Build fails | Check build logs in Settings. Verify Dockerfile exists. |
|
| 106 |
+
| App crashes | Check runtime logs. Ensure secrets are set. |
|
| 107 |
+
| API returns 500 | Credentials invalid. Test locally first. |
|
| 108 |
+
| "Port already in use" | App should auto-detect port. Check `app.py`. |
|
| 109 |
+
|
| 110 |
+
## Monitoring
|
| 111 |
+
|
| 112 |
+
View logs in Space:
|
| 113 |
+
1. **Settings** → **Build logs** (deployment)
|
| 114 |
+
2. **Settings** → **Runtime logs** (application)
|
| 115 |
+
|
| 116 |
+
## Upgrade Options
|
| 117 |
+
|
| 118 |
+
- **Free**: 2 CPU, shared GPU, variable uptime
|
| 119 |
+
- **Pro**: $50/month, dedicated resources, custom domain
|
| 120 |
+
|
| 121 |
+
---
|
| 122 |
+
|
| 123 |
+
## File Checklist
|
| 124 |
+
|
| 125 |
+
Before pushing, ensure you have:
|
| 126 |
+
|
| 127 |
+
- [ ] `app.py` - Flask server
|
| 128 |
+
- [ ] `unity_catalog_service.py` - UC operations
|
| 129 |
+
- [ ] `config.py` - Configuration management
|
| 130 |
+
- [ ] `requirements.txt` - Python dependencies
|
| 131 |
+
- [ ] `Dockerfile` - Container definition
|
| 132 |
+
- [ ] `README.md` - Space description (with metadata)
|
| 133 |
+
- [ ] `.gitignore` - Exclude `.env` and `__pycache__`
|
| 134 |
+
|
| 135 |
+
## Commands Quick Reference
|
| 136 |
+
|
| 137 |
+
```bash
|
| 138 |
+
# Clone space
|
| 139 |
+
git clone https://huggingface.co/spaces/username/unitycatalog-chatbot
|
| 140 |
+
|
| 141 |
+
# Push updates
|
| 142 |
+
git add . && git commit -m "Update" && git push
|
| 143 |
+
|
| 144 |
+
# View logs
|
| 145 |
+
# Go to: https://huggingface.co/spaces/username/unitycatalog-chatbot/settings
|
| 146 |
+
|
| 147 |
+
# Delete space (if needed)
|
| 148 |
+
# Go to: https://huggingface.co/spaces/username/unitycatalog-chatbot/settings → Delete
|
| 149 |
+
```
|
| 150 |
+
|
| 151 |
+
---
|
| 152 |
+
|
| 153 |
+
**Your app will be live in 2-5 minutes!** 🚀
|
HF_README.md
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Unity Catalog Chatbot
|
| 3 |
+
emoji: 💬
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Unity Catalog Chatbot
|
| 12 |
+
|
| 13 |
+
A natural language chatbot for managing Databricks Unity Catalog powered by Claude AI.
|
| 14 |
+
|
| 15 |
+
## Features
|
| 16 |
+
|
| 17 |
+
✨ **Natural Language Interface**
|
| 18 |
+
- Ask questions in plain English
|
| 19 |
+
- Get instant responses from Claude AI
|
| 20 |
+
- No SQL knowledge required
|
| 21 |
+
|
| 22 |
+
🗂️ **Catalog Management**
|
| 23 |
+
- Create catalogs, schemas, and tables
|
| 24 |
+
- List objects across your workspace
|
| 25 |
+
- View table details and metadata
|
| 26 |
+
|
| 27 |
+
🔐 **Permission Management**
|
| 28 |
+
- Grant and revoke permissions
|
| 29 |
+
- Manage access control via chat
|
| 30 |
+
- Support for users and groups
|
| 31 |
+
|
| 32 |
+
🚀 **REST API**
|
| 33 |
+
- Full JSON API for integrations
|
| 34 |
+
- Health checks and monitoring
|
| 35 |
+
- Easy to embed in other apps
|
| 36 |
+
|
| 37 |
+
## Quick Start
|
| 38 |
+
|
| 39 |
+
### 1. Add Secrets
|
| 40 |
+
|
| 41 |
+
Go to **Settings → Secrets** and add:
|
| 42 |
+
|
| 43 |
+
```
|
| 44 |
+
DATABRICKS_HOST = https://your-workspace.databricks.com
|
| 45 |
+
DATABRICKS_TOKEN = dapi...your-token...
|
| 46 |
+
ANTHROPIC_API_KEY = sk-ant-...your-key...
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
### 2. Wait for Build
|
| 50 |
+
|
| 51 |
+
Space will auto-build (~2 min). Check **Settings → Build logs**.
|
| 52 |
+
|
| 53 |
+
### 3. Start Using
|
| 54 |
+
|
| 55 |
+
Once running, the app will be available at the Space URL.
|
| 56 |
+
|
| 57 |
+
### 4. API Endpoints
|
| 58 |
+
|
| 59 |
+
#### Health Check
|
| 60 |
+
```bash
|
| 61 |
+
GET /api/health
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
#### List Catalogs
|
| 65 |
+
```bash
|
| 66 |
+
GET /api/catalogs
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
#### List Schemas
|
| 70 |
+
```bash
|
| 71 |
+
GET /api/schemas/{catalog}
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
#### List Tables
|
| 75 |
+
```bash
|
| 76 |
+
GET /api/tables/{catalog}/{schema}
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
#### Chat (Main Endpoint)
|
| 80 |
+
```bash
|
| 81 |
+
POST /api/chat
|
| 82 |
+
Content-Type: application/json
|
| 83 |
+
|
| 84 |
+
{
|
| 85 |
+
"message": "Create a catalog named sales_data"
|
| 86 |
+
}
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
## Example Requests
|
| 90 |
+
|
| 91 |
+
### Create a Catalog
|
| 92 |
+
```bash
|
| 93 |
+
curl -X POST https://your-space-url/api/chat \
|
| 94 |
+
-H "Content-Type: application/json" \
|
| 95 |
+
-d '{"message": "Create a catalog named sales_data"}'
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
### Grant Permissions
|
| 99 |
+
```bash
|
| 100 |
+
curl -X POST https://your-space-url/api/chat \
|
| 101 |
+
-H "Content-Type: application/json" \
|
| 102 |
+
-d '{
|
| 103 |
+
"message": "Grant SELECT on sales_data.customers to data_analysts"
|
| 104 |
+
}'
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### List Objects
|
| 108 |
+
```bash
|
| 109 |
+
curl https://your-space-url/api/catalogs
|
| 110 |
+
curl https://your-space-url/api/schemas/sales_data
|
| 111 |
+
curl https://your-space-url/api/tables/sales_data/analytics
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
## Supported Operations
|
| 115 |
+
|
| 116 |
+
- ✅ Create catalogs
|
| 117 |
+
- ✅ Create schemas
|
| 118 |
+
- ✅ Create tables
|
| 119 |
+
- ✅ Grant permissions
|
| 120 |
+
- ✅ Revoke permissions
|
| 121 |
+
- ✅ List catalogs/schemas/tables
|
| 122 |
+
- ✅ Show permissions
|
| 123 |
+
- ✅ Set object owner
|
| 124 |
+
- ✅ Get table details
|
| 125 |
+
- ✅ Execute SQL (when enabled)
|
| 126 |
+
|
| 127 |
+
## Requirements
|
| 128 |
+
|
| 129 |
+
- **Databricks** workspace with Unity Catalog enabled
|
| 130 |
+
- **Personal Access Token** (generate in Databricks)
|
| 131 |
+
- **Anthropic API Key** (get from https://console.anthropic.com)
|
| 132 |
+
|
| 133 |
+
## Architecture
|
| 134 |
+
|
| 135 |
+
```
|
| 136 |
+
┌─────────────────────┐
|
| 137 |
+
│ User / Client │
|
| 138 |
+
└──────────┬──────────┘
|
| 139 |
+
│
|
| 140 |
+
v
|
| 141 |
+
┌─────────────────────┐
|
| 142 |
+
│ Flask API Server │
|
| 143 |
+
│ (Port 5000) │
|
| 144 |
+
└──────────┬──────────┘
|
| 145 |
+
│
|
| 146 |
+
┌────┴──────┬────────────┐
|
| 147 |
+
v v v
|
| 148 |
+
Claude AI UC Service Config Manager
|
| 149 |
+
│ │ │
|
| 150 |
+
└────┬──────┴────────────┘
|
| 151 |
+
v
|
| 152 |
+
Databricks Unity Catalog
|
| 153 |
+
+ Anthropic API
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
## Local Development
|
| 157 |
+
|
| 158 |
+
```bash
|
| 159 |
+
# Clone repo
|
| 160 |
+
git clone <repo-url>
|
| 161 |
+
cd UnityCatalog-ChatBot
|
| 162 |
+
|
| 163 |
+
# Setup
|
| 164 |
+
python -m venv venv
|
| 165 |
+
source venv/bin/activate # or venv\Scripts\activate on Windows
|
| 166 |
+
pip install -r requirements.txt
|
| 167 |
+
|
| 168 |
+
# Configure
|
| 169 |
+
cp .env.example .env
|
| 170 |
+
# Edit .env with your credentials
|
| 171 |
+
|
| 172 |
+
# Run tests
|
| 173 |
+
pytest test_chatbot.py -v
|
| 174 |
+
|
| 175 |
+
# Run server
|
| 176 |
+
python app.py
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
## Docker
|
| 180 |
+
|
| 181 |
+
```bash
|
| 182 |
+
# Build
|
| 183 |
+
docker build -t unitycatalog-chatbot .
|
| 184 |
+
|
| 185 |
+
# Run
|
| 186 |
+
docker run -p 5000:5000 \
|
| 187 |
+
-e DATABRICKS_HOST="https://..." \
|
| 188 |
+
-e DATABRICKS_TOKEN="..." \
|
| 189 |
+
-e ANTHROPIC_API_KEY="..." \
|
| 190 |
+
unitycatalog-chatbot
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
## Troubleshooting
|
| 194 |
+
|
| 195 |
+
### Build Fails
|
| 196 |
+
- Check **Settings → Build logs**
|
| 197 |
+
- Ensure `Dockerfile` exists
|
| 198 |
+
- Verify `requirements.txt` syntax
|
| 199 |
+
|
| 200 |
+
### App Crashes
|
| 201 |
+
- Check **Settings → Runtime logs**
|
| 202 |
+
- Verify secrets are set correctly
|
| 203 |
+
- Test credentials locally first
|
| 204 |
+
|
| 205 |
+
### API Returns Error
|
| 206 |
+
- Confirm Databricks host URL is correct
|
| 207 |
+
- Check token hasn't expired
|
| 208 |
+
- Verify Anthropic API key is valid
|
| 209 |
+
|
| 210 |
+
### Slow Responses
|
| 211 |
+
- Databricks API latency
|
| 212 |
+
- Large catalog size (many objects)
|
| 213 |
+
- Network connectivity
|
| 214 |
+
|
| 215 |
+
## Security Notes
|
| 216 |
+
|
| 217 |
+
⚠️ **Never commit secrets to Git**
|
| 218 |
+
- Use Hugging Face Secrets feature
|
| 219 |
+
- Rotate tokens regularly
|
| 220 |
+
- Use IAM roles when possible
|
| 221 |
+
|
| 222 |
+
## Performance
|
| 223 |
+
|
| 224 |
+
- **Requests**: Up to 60/min (configurable)
|
| 225 |
+
- **Response time**: 2-5 seconds typical
|
| 226 |
+
- **Catalog size**: Tested with 1000+ objects
|
| 227 |
+
- **Concurrent users**: Limited by Space tier
|
| 228 |
+
|
| 229 |
+
## License
|
| 230 |
+
|
| 231 |
+
MIT
|
| 232 |
+
|
| 233 |
+
## Support
|
| 234 |
+
|
| 235 |
+
- GitHub Issues: [Link to repo]
|
| 236 |
+
- Documentation: See `/docs`
|
| 237 |
+
- Discord: [Link to community]
|
| 238 |
+
|
| 239 |
+
---
|
| 240 |
+
|
| 241 |
+
**Built with ❤️ using Flask, Claude, and Databricks**
|
| 242 |
+
|
| 243 |
+
*Hugging Face Spaces - Free hosting for ML apps*
|
app.py
CHANGED
|
@@ -14,9 +14,18 @@ from unity_catalog_service import UnityCatalogService
|
|
| 14 |
app = Flask(__name__)
|
| 15 |
CORS(app)
|
| 16 |
|
| 17 |
-
# Initialize services
|
| 18 |
-
uc_service =
|
| 19 |
-
claude_client =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# System prompt for Claude to parse Unity Catalog requests
|
| 22 |
SYSTEM_PROMPT = """You are an expert Unity Catalog assistant. Your role is to:
|
|
@@ -68,7 +77,8 @@ Always return valid JSON only, no additional text."""
|
|
| 68 |
def parse_with_claude(user_message: str) -> Dict:
|
| 69 |
"""Use Claude to parse complex natural language requests"""
|
| 70 |
try:
|
| 71 |
-
|
|
|
|
| 72 |
model="claude-sonnet-4-20250514",
|
| 73 |
max_tokens=1000,
|
| 74 |
system=SYSTEM_PROMPT,
|
|
@@ -100,12 +110,13 @@ def parse_with_claude(user_message: str) -> Dict:
|
|
| 100 |
|
| 101 |
def execute_intent(intent_data: Dict) -> Dict:
|
| 102 |
"""Execute the parsed intent using Unity Catalog service"""
|
|
|
|
| 103 |
intent = intent_data.get("intent")
|
| 104 |
params = intent_data.get("params", {})
|
| 105 |
|
| 106 |
try:
|
| 107 |
if intent == "createCatalog":
|
| 108 |
-
return
|
| 109 |
name=params.get("catalog"),
|
| 110 |
comment=params.get("comment")
|
| 111 |
)
|
|
@@ -115,7 +126,7 @@ def execute_intent(intent_data: Dict) -> Dict:
|
|
| 115 |
# If full path provided (e.g., "catalog.schema")
|
| 116 |
if not schema and catalog and '.' in catalog:
|
| 117 |
catalog, schema = catalog.split('.', 1)
|
| 118 |
-
return
|
| 119 |
catalog=catalog,
|
| 120 |
schema=schema,
|
| 121 |
comment=params.get("comment")
|
|
@@ -134,7 +145,7 @@ def execute_intent(intent_data: Dict) -> Dict:
|
|
| 134 |
elif len(parts) == 2 and catalog:
|
| 135 |
schema, table = parts
|
| 136 |
|
| 137 |
-
return
|
| 138 |
catalog=catalog,
|
| 139 |
schema=schema,
|
| 140 |
table=table,
|
|
@@ -154,7 +165,7 @@ def execute_intent(intent_data: Dict) -> Dict:
|
|
| 154 |
else:
|
| 155 |
securable_type = "TABLE"
|
| 156 |
|
| 157 |
-
return
|
| 158 |
principal=params.get("principal"),
|
| 159 |
privilege=params.get("privilege"),
|
| 160 |
securable_type=securable_type,
|
|
@@ -172,7 +183,7 @@ def execute_intent(intent_data: Dict) -> Dict:
|
|
| 172 |
else:
|
| 173 |
securable_type = "TABLE"
|
| 174 |
|
| 175 |
-
return
|
| 176 |
principal=params.get("principal"),
|
| 177 |
privilege=params.get("privilege"),
|
| 178 |
securable_type=securable_type,
|
|
@@ -180,13 +191,13 @@ def execute_intent(intent_data: Dict) -> Dict:
|
|
| 180 |
)
|
| 181 |
|
| 182 |
elif intent == "listCatalogs":
|
| 183 |
-
return
|
| 184 |
|
| 185 |
elif intent == "listSchemas":
|
| 186 |
-
return
|
| 187 |
|
| 188 |
elif intent == "listTables":
|
| 189 |
-
return
|
| 190 |
params.get("catalog"),
|
| 191 |
params.get("schema")
|
| 192 |
)
|
|
@@ -202,7 +213,7 @@ def execute_intent(intent_data: Dict) -> Dict:
|
|
| 202 |
else:
|
| 203 |
securable_type = "TABLE"
|
| 204 |
|
| 205 |
-
return
|
| 206 |
|
| 207 |
elif intent == "setOwner":
|
| 208 |
obj = params.get("object", "")
|
|
@@ -215,7 +226,7 @@ def execute_intent(intent_data: Dict) -> Dict:
|
|
| 215 |
else:
|
| 216 |
securable_type = "TABLE"
|
| 217 |
|
| 218 |
-
return
|
| 219 |
securable_type=securable_type,
|
| 220 |
securable_name=obj,
|
| 221 |
owner=params.get("owner")
|
|
@@ -224,7 +235,7 @@ def execute_intent(intent_data: Dict) -> Dict:
|
|
| 224 |
elif intent == "getTableDetails":
|
| 225 |
parts = params.get("table", "").split('.')
|
| 226 |
if len(parts) == 3:
|
| 227 |
-
return
|
| 228 |
else:
|
| 229 |
return {
|
| 230 |
"success": False,
|
|
@@ -315,21 +326,24 @@ def health():
|
|
| 315 |
@app.route('/api/catalogs', methods=['GET'])
|
| 316 |
def get_catalogs():
|
| 317 |
"""Get all catalogs"""
|
| 318 |
-
|
|
|
|
| 319 |
return jsonify(result)
|
| 320 |
|
| 321 |
|
| 322 |
@app.route('/api/schemas/<catalog>', methods=['GET'])
|
| 323 |
def get_schemas(catalog):
|
| 324 |
"""Get schemas in a catalog"""
|
| 325 |
-
|
|
|
|
| 326 |
return jsonify(result)
|
| 327 |
|
| 328 |
|
| 329 |
@app.route('/api/tables/<catalog>/<schema>', methods=['GET'])
|
| 330 |
def get_tables(catalog, schema):
|
| 331 |
"""Get tables in a schema"""
|
| 332 |
-
|
|
|
|
| 333 |
return jsonify(result)
|
| 334 |
|
| 335 |
|
|
@@ -341,7 +355,8 @@ def execute_sql():
|
|
| 341 |
sql = data.get('sql', '')
|
| 342 |
warehouse_id = data.get('warehouse_id')
|
| 343 |
|
| 344 |
-
|
|
|
|
| 345 |
return jsonify(result)
|
| 346 |
|
| 347 |
except Exception as e:
|
|
|
|
| 14 |
app = Flask(__name__)
|
| 15 |
CORS(app)
|
| 16 |
|
| 17 |
+
# Initialize services (lazy to allow mocking in tests)
|
| 18 |
+
uc_service = None
|
| 19 |
+
claude_client = None
|
| 20 |
+
|
| 21 |
+
def _init_services():
|
| 22 |
+
"""Lazy initialize services."""
|
| 23 |
+
global uc_service, claude_client
|
| 24 |
+
if uc_service is None:
|
| 25 |
+
uc_service = UnityCatalogService()
|
| 26 |
+
if claude_client is None:
|
| 27 |
+
claude_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
| 28 |
+
return uc_service, claude_client
|
| 29 |
|
| 30 |
# System prompt for Claude to parse Unity Catalog requests
|
| 31 |
SYSTEM_PROMPT = """You are an expert Unity Catalog assistant. Your role is to:
|
|
|
|
| 77 |
def parse_with_claude(user_message: str) -> Dict:
|
| 78 |
"""Use Claude to parse complex natural language requests"""
|
| 79 |
try:
|
| 80 |
+
_, client = _init_services() # Lazy init
|
| 81 |
+
message = client.messages.create(
|
| 82 |
model="claude-sonnet-4-20250514",
|
| 83 |
max_tokens=1000,
|
| 84 |
system=SYSTEM_PROMPT,
|
|
|
|
| 110 |
|
| 111 |
def execute_intent(intent_data: Dict) -> Dict:
|
| 112 |
"""Execute the parsed intent using Unity Catalog service"""
|
| 113 |
+
uc, _ = _init_services() # Lazy init
|
| 114 |
intent = intent_data.get("intent")
|
| 115 |
params = intent_data.get("params", {})
|
| 116 |
|
| 117 |
try:
|
| 118 |
if intent == "createCatalog":
|
| 119 |
+
return uc.create_catalog(
|
| 120 |
name=params.get("catalog"),
|
| 121 |
comment=params.get("comment")
|
| 122 |
)
|
|
|
|
| 126 |
# If full path provided (e.g., "catalog.schema")
|
| 127 |
if not schema and catalog and '.' in catalog:
|
| 128 |
catalog, schema = catalog.split('.', 1)
|
| 129 |
+
return uc.create_schema(
|
| 130 |
catalog=catalog,
|
| 131 |
schema=schema,
|
| 132 |
comment=params.get("comment")
|
|
|
|
| 145 |
elif len(parts) == 2 and catalog:
|
| 146 |
schema, table = parts
|
| 147 |
|
| 148 |
+
return uc.create_table(
|
| 149 |
catalog=catalog,
|
| 150 |
schema=schema,
|
| 151 |
table=table,
|
|
|
|
| 165 |
else:
|
| 166 |
securable_type = "TABLE"
|
| 167 |
|
| 168 |
+
return uc.grant_permission(
|
| 169 |
principal=params.get("principal"),
|
| 170 |
privilege=params.get("privilege"),
|
| 171 |
securable_type=securable_type,
|
|
|
|
| 183 |
else:
|
| 184 |
securable_type = "TABLE"
|
| 185 |
|
| 186 |
+
return uc.revoke_permission(
|
| 187 |
principal=params.get("principal"),
|
| 188 |
privilege=params.get("privilege"),
|
| 189 |
securable_type=securable_type,
|
|
|
|
| 191 |
)
|
| 192 |
|
| 193 |
elif intent == "listCatalogs":
|
| 194 |
+
return uc.list_catalogs()
|
| 195 |
|
| 196 |
elif intent == "listSchemas":
|
| 197 |
+
return uc.list_schemas(params.get("catalog"))
|
| 198 |
|
| 199 |
elif intent == "listTables":
|
| 200 |
+
return uc.list_tables(
|
| 201 |
params.get("catalog"),
|
| 202 |
params.get("schema")
|
| 203 |
)
|
|
|
|
| 213 |
else:
|
| 214 |
securable_type = "TABLE"
|
| 215 |
|
| 216 |
+
return uc.show_grants(securable_type, obj)
|
| 217 |
|
| 218 |
elif intent == "setOwner":
|
| 219 |
obj = params.get("object", "")
|
|
|
|
| 226 |
else:
|
| 227 |
securable_type = "TABLE"
|
| 228 |
|
| 229 |
+
return uc.set_owner(
|
| 230 |
securable_type=securable_type,
|
| 231 |
securable_name=obj,
|
| 232 |
owner=params.get("owner")
|
|
|
|
| 235 |
elif intent == "getTableDetails":
|
| 236 |
parts = params.get("table", "").split('.')
|
| 237 |
if len(parts) == 3:
|
| 238 |
+
return uc.get_table(parts[0], parts[1], parts[2])
|
| 239 |
else:
|
| 240 |
return {
|
| 241 |
"success": False,
|
|
|
|
| 326 |
@app.route('/api/catalogs', methods=['GET'])
|
| 327 |
def get_catalogs():
|
| 328 |
"""Get all catalogs"""
|
| 329 |
+
uc, _ = _init_services()
|
| 330 |
+
result = uc.list_catalogs()
|
| 331 |
return jsonify(result)
|
| 332 |
|
| 333 |
|
| 334 |
@app.route('/api/schemas/<catalog>', methods=['GET'])
|
| 335 |
def get_schemas(catalog):
|
| 336 |
"""Get schemas in a catalog"""
|
| 337 |
+
uc, _ = _init_services()
|
| 338 |
+
result = uc.list_schemas(catalog)
|
| 339 |
return jsonify(result)
|
| 340 |
|
| 341 |
|
| 342 |
@app.route('/api/tables/<catalog>/<schema>', methods=['GET'])
|
| 343 |
def get_tables(catalog, schema):
|
| 344 |
"""Get tables in a schema"""
|
| 345 |
+
uc, _ = _init_services()
|
| 346 |
+
result = uc.list_tables(catalog, schema)
|
| 347 |
return jsonify(result)
|
| 348 |
|
| 349 |
|
|
|
|
| 355 |
sql = data.get('sql', '')
|
| 356 |
warehouse_id = data.get('warehouse_id')
|
| 357 |
|
| 358 |
+
uc, _ = _init_services()
|
| 359 |
+
result = uc.execute_sql(sql, warehouse_id)
|
| 360 |
return jsonify(result)
|
| 361 |
|
| 362 |
except Exception as e:
|
conftest.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared pytest fixtures for offline testing."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from types import SimpleNamespace
|
| 5 |
+
from unittest.mock import MagicMock
|
| 6 |
+
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
import app as app_module
|
| 10 |
+
import unity_catalog_service as uc_module
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@pytest.fixture(scope="function", autouse=True)
|
| 14 |
+
def dummy_env(monkeypatch):
|
| 15 |
+
"""Set safe defaults to satisfy config validation without real secrets."""
|
| 16 |
+
env_defaults = {
|
| 17 |
+
"DATABRICKS_HOST": "https://dummy",
|
| 18 |
+
"DATABRICKS_TOKEN": "dummytoken123",
|
| 19 |
+
"ANTHROPIC_API_KEY": "sk-ant-dummy",
|
| 20 |
+
}
|
| 21 |
+
for key, value in env_defaults.items():
|
| 22 |
+
monkeypatch.setenv(key, value)
|
| 23 |
+
yield
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@pytest.fixture
|
| 27 |
+
def workspace_client():
|
| 28 |
+
"""Provide a mocked Databricks WorkspaceClient surface."""
|
| 29 |
+
catalogs = MagicMock()
|
| 30 |
+
catalogs.create = MagicMock()
|
| 31 |
+
catalogs.list = MagicMock(return_value=[])
|
| 32 |
+
catalogs.get = MagicMock()
|
| 33 |
+
catalogs.delete = MagicMock()
|
| 34 |
+
|
| 35 |
+
schemas = MagicMock()
|
| 36 |
+
schemas.create = MagicMock()
|
| 37 |
+
schemas.list = MagicMock(return_value=[])
|
| 38 |
+
schemas.delete = MagicMock()
|
| 39 |
+
|
| 40 |
+
tables = MagicMock()
|
| 41 |
+
tables.create = MagicMock()
|
| 42 |
+
tables.list = MagicMock(return_value=[])
|
| 43 |
+
tables.get = MagicMock()
|
| 44 |
+
|
| 45 |
+
grants = MagicMock()
|
| 46 |
+
grants.update = MagicMock()
|
| 47 |
+
grants.get = MagicMock()
|
| 48 |
+
|
| 49 |
+
sql = MagicMock()
|
| 50 |
+
sql.execute = MagicMock(return_value=[])
|
| 51 |
+
|
| 52 |
+
return SimpleNamespace(
|
| 53 |
+
catalogs=catalogs,
|
| 54 |
+
schemas=schemas,
|
| 55 |
+
tables=tables,
|
| 56 |
+
grants=grants,
|
| 57 |
+
sql=sql,
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@pytest.fixture(autouse=True)
|
| 62 |
+
def patch_workspace_client(monkeypatch, workspace_client):
|
| 63 |
+
"""Force UnityCatalogService to use the shared workspace client mock."""
|
| 64 |
+
monkeypatch.setattr(uc_module, "WorkspaceClient", MagicMock(return_value=workspace_client))
|
| 65 |
+
yield
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
@pytest.fixture
|
| 69 |
+
def uc_service(workspace_client):
|
| 70 |
+
"""Real UnityCatalogService instance bound to the mocked workspace client."""
|
| 71 |
+
service = uc_module.UnityCatalogService(
|
| 72 |
+
workspace_url="https://dummy",
|
| 73 |
+
token="dummytoken123",
|
| 74 |
+
)
|
| 75 |
+
service.client = workspace_client
|
| 76 |
+
return service
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
@pytest.fixture
|
| 80 |
+
def claude_client_mock():
|
| 81 |
+
"""Mock Anthropics client matching the methods used in app.py."""
|
| 82 |
+
messages = MagicMock()
|
| 83 |
+
messages.create = MagicMock()
|
| 84 |
+
return SimpleNamespace(messages=messages)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@pytest.fixture(autouse=True)
|
| 88 |
+
def patch_app_init(monkeypatch, uc_service, claude_client_mock):
|
| 89 |
+
"""Override app._init_services to return mocked services for tests."""
|
| 90 |
+
def mock_init_services():
|
| 91 |
+
return uc_service, claude_client_mock
|
| 92 |
+
|
| 93 |
+
monkeypatch.setattr(app_module, "_init_services", mock_init_services)
|
| 94 |
+
yield
|
| 95 |
+
|
requirements.txt
CHANGED
|
@@ -2,4 +2,4 @@ flask==3.0.0
|
|
| 2 |
flask-cors==4.0.0
|
| 3 |
databricks-sdk==0.18.0
|
| 4 |
anthropic==0.39.0
|
| 5 |
-
python-dotenv==1.0.0
|
|
|
|
| 2 |
flask-cors==4.0.0
|
| 3 |
databricks-sdk==0.18.0
|
| 4 |
anthropic==0.39.0
|
| 5 |
+
python-dotenv==1.0.0gunicorn==21.2.0
|