the update
Browse files- .streamlit/secrets.toml.example +11 -0
- DEPLOYMENT.md +375 -0
- backend/__init__.py +2 -0
- backend/api.py +720 -0
- backend/requirements.txt +54 -0
- demo/backend_client.py +315 -0
- demo/rag_config.py +158 -7
- demo/state_manager.py +27 -4
- docs_connection.md +183 -0
.streamlit/secrets.toml.example
CHANGED
|
@@ -14,6 +14,17 @@
|
|
| 14 |
# Single user mode
|
| 15 |
password = "your-secure-password"
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Multi-user mode (uncomment to use):
|
| 18 |
# [auth.users]
|
| 19 |
# admin = "admin-password-here"
|
|
|
|
| 14 |
# Single user mode
|
| 15 |
password = "your-secure-password"
|
| 16 |
|
| 17 |
+
# ============================================================================
|
| 18 |
+
# Backend Server (Optional - for GPU processing)
|
| 19 |
+
# ============================================================================
|
| 20 |
+
# If you have a GPU server (e.g., Lytos), configure the backend URL here.
|
| 21 |
+
# The backend provides GPU-accelerated OCR, embeddings, and RAG processing.
|
| 22 |
+
# See DEPLOYMENT.md for setup instructions.
|
| 23 |
+
|
| 24 |
+
# BACKEND_URL = "https://your-gpu-server.com:8000"
|
| 25 |
+
# Or for local testing:
|
| 26 |
+
# BACKEND_URL = "http://localhost:8000"
|
| 27 |
+
|
| 28 |
# Multi-user mode (uncomment to use):
|
| 29 |
# [auth.users]
|
| 30 |
# admin = "admin-password-here"
|
DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPARKNET Deployment Guide
|
| 2 |
+
|
| 3 |
+
## Architecture Overview
|
| 4 |
+
|
| 5 |
+
SPARKNET supports a hybrid deployment architecture:
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
┌─────────────────────────────┐ ┌─────────────────────────────┐
|
| 9 |
+
│ Streamlit Cloud │ │ GPU Server (Lytos) │
|
| 10 |
+
│ (Frontend/UI) │ HTTPS │ FastAPI Backend │
|
| 11 |
+
│ │ ◄─────► │ │
|
| 12 |
+
│ sparknet.streamlit.app │ API │ - PaddleOCR (GPU) │
|
| 13 |
+
│ │ │ - Document Processing │
|
| 14 |
+
│ - User Interface │ │ - RAG + Embeddings │
|
| 15 |
+
│ - Authentication │ │ - Ollama LLM │
|
| 16 |
+
│ - Cloud LLM fallback │ │ - ChromaDB Vector Store │
|
| 17 |
+
└─────────────────────────────┘ └─────────────────────────────┘
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
## Deployment Options
|
| 21 |
+
|
| 22 |
+
### Option 1: Full Stack on GPU Server (Recommended for Production)
|
| 23 |
+
|
| 24 |
+
Run both frontend and backend on Lytos with GPU acceleration.
|
| 25 |
+
|
| 26 |
+
### Option 2: Hybrid (Streamlit Cloud + GPU Backend)
|
| 27 |
+
|
| 28 |
+
- **Frontend**: Streamlit Cloud (free hosting, easy sharing)
|
| 29 |
+
- **Backend**: Lytos GPU server (full processing power)
|
| 30 |
+
|
| 31 |
+
### Option 3: Streamlit Cloud Only (Demo Mode)
|
| 32 |
+
|
| 33 |
+
- Uses cloud LLM providers (Groq, Gemini, etc.)
|
| 34 |
+
- Limited functionality (no OCR, no RAG indexing)
|
| 35 |
+
|
| 36 |
+
---
|
| 37 |
+
|
| 38 |
+
## Option 2: Hybrid Deployment (Recommended)
|
| 39 |
+
|
| 40 |
+
### Step 1: Setup Backend on Lytos (GPU Server)
|
| 41 |
+
|
| 42 |
+
#### 1.1 SSH into Lytos
|
| 43 |
+
```bash
|
| 44 |
+
ssh user@lytos.server.address
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
#### 1.2 Clone the repository
|
| 48 |
+
```bash
|
| 49 |
+
git clone https://github.com/your-repo/sparknet.git
|
| 50 |
+
cd sparknet
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
#### 1.3 Create virtual environment
|
| 54 |
+
```bash
|
| 55 |
+
python -m venv venv
|
| 56 |
+
source venv/bin/activate
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
#### 1.4 Install backend dependencies
|
| 60 |
+
```bash
|
| 61 |
+
pip install -r backend/requirements.txt
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
#### 1.5 Install Ollama (for LLM inference)
|
| 65 |
+
```bash
|
| 66 |
+
curl -fsSL https://ollama.com/install.sh | sh
|
| 67 |
+
|
| 68 |
+
# Pull required models
|
| 69 |
+
ollama pull llama3.2:latest
|
| 70 |
+
ollama pull nomic-embed-text
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
#### 1.6 Start the backend server
|
| 74 |
+
```bash
|
| 75 |
+
# Development mode
|
| 76 |
+
cd backend
|
| 77 |
+
uvicorn api:app --host 0.0.0.0 --port 8000 --reload
|
| 78 |
+
|
| 79 |
+
# Production mode (with multiple workers)
|
| 80 |
+
uvicorn api:app --host 0.0.0.0 --port 8000 --workers 4
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
#### 1.7 (Optional) Run with systemd for auto-restart
|
| 84 |
+
```bash
|
| 85 |
+
sudo nano /etc/systemd/system/sparknet-backend.service
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
Add:
|
| 89 |
+
```ini
|
| 90 |
+
[Unit]
|
| 91 |
+
Description=SPARKNET Backend API
|
| 92 |
+
After=network.target
|
| 93 |
+
|
| 94 |
+
[Service]
|
| 95 |
+
Type=simple
|
| 96 |
+
User=your-user
|
| 97 |
+
WorkingDirectory=/path/to/sparknet/backend
|
| 98 |
+
Environment=PATH=/path/to/sparknet/venv/bin
|
| 99 |
+
ExecStart=/path/to/sparknet/venv/bin/uvicorn api:app --host 0.0.0.0 --port 8000 --workers 4
|
| 100 |
+
Restart=always
|
| 101 |
+
RestartSec=10
|
| 102 |
+
|
| 103 |
+
[Install]
|
| 104 |
+
WantedBy=multi-user.target
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
Enable and start:
|
| 108 |
+
```bash
|
| 109 |
+
sudo systemctl enable sparknet-backend
|
| 110 |
+
sudo systemctl start sparknet-backend
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
#### 1.8 Configure firewall (allow port 8000)
|
| 114 |
+
```bash
|
| 115 |
+
sudo ufw allow 8000/tcp
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
#### 1.9 (Optional) Setup HTTPS with nginx
|
| 119 |
+
```bash
|
| 120 |
+
sudo apt install nginx certbot python3-certbot-nginx
|
| 121 |
+
|
| 122 |
+
sudo nano /etc/nginx/sites-available/sparknet
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
Add:
|
| 126 |
+
```nginx
|
| 127 |
+
server {
|
| 128 |
+
listen 80;
|
| 129 |
+
server_name api.sparknet.yourdomain.com;
|
| 130 |
+
|
| 131 |
+
location / {
|
| 132 |
+
proxy_pass http://127.0.0.1:8000;
|
| 133 |
+
proxy_http_version 1.1;
|
| 134 |
+
proxy_set_header Upgrade $http_upgrade;
|
| 135 |
+
proxy_set_header Connection 'upgrade';
|
| 136 |
+
proxy_set_header Host $host;
|
| 137 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 138 |
+
proxy_cache_bypass $http_upgrade;
|
| 139 |
+
proxy_read_timeout 300s;
|
| 140 |
+
proxy_connect_timeout 75s;
|
| 141 |
+
}
|
| 142 |
+
}
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
Enable and get SSL:
|
| 146 |
+
```bash
|
| 147 |
+
sudo ln -s /etc/nginx/sites-available/sparknet /etc/nginx/sites-enabled/
|
| 148 |
+
sudo certbot --nginx -d api.sparknet.yourdomain.com
|
| 149 |
+
sudo systemctl restart nginx
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
### Step 2: Configure Streamlit Cloud
|
| 153 |
+
|
| 154 |
+
#### 2.1 Update Streamlit secrets
|
| 155 |
+
|
| 156 |
+
In Streamlit Cloud dashboard → Settings → Secrets, add:
|
| 157 |
+
|
| 158 |
+
```toml
|
| 159 |
+
[auth]
|
| 160 |
+
password = "SPARKNET@2026"
|
| 161 |
+
|
| 162 |
+
# Backend URL (your Lytos server)
|
| 163 |
+
BACKEND_URL = "https://api.sparknet.yourdomain.com"
|
| 164 |
+
# Or without HTTPS:
|
| 165 |
+
# BACKEND_URL = "http://lytos-ip-address:8000"
|
| 166 |
+
|
| 167 |
+
# Fallback cloud providers (optional, used if backend unavailable)
|
| 168 |
+
GROQ_API_KEY = "your-groq-key"
|
| 169 |
+
GOOGLE_API_KEY = "your-google-key"
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
#### 2.2 Deploy to Streamlit Cloud
|
| 173 |
+
|
| 174 |
+
Push your code and Streamlit Cloud will auto-deploy:
|
| 175 |
+
```bash
|
| 176 |
+
git add .
|
| 177 |
+
git commit -m "Add backend support"
|
| 178 |
+
git push origin main
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
### Step 3: Verify Deployment
|
| 182 |
+
|
| 183 |
+
#### 3.1 Test backend directly
|
| 184 |
+
```bash
|
| 185 |
+
# Health check
|
| 186 |
+
curl https://api.sparknet.yourdomain.com/api/health
|
| 187 |
+
|
| 188 |
+
# System status
|
| 189 |
+
curl https://api.sparknet.yourdomain.com/api/status
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
#### 3.2 Test from Streamlit
|
| 193 |
+
Visit your Streamlit app and check:
|
| 194 |
+
- Status bar should show "Backend" instead of "Demo Mode"
|
| 195 |
+
- GPU indicator should appear
|
| 196 |
+
- Document processing should use full pipeline
|
| 197 |
+
|
| 198 |
+
---
|
| 199 |
+
|
| 200 |
+
## Backend API Endpoints
|
| 201 |
+
|
| 202 |
+
| Endpoint | Method | Description |
|
| 203 |
+
|----------|--------|-------------|
|
| 204 |
+
| `/api/health` | GET | Health check |
|
| 205 |
+
| `/api/status` | GET | System status (Ollama, GPU, RAG) |
|
| 206 |
+
| `/api/process` | POST | Process document (OCR, layout) |
|
| 207 |
+
| `/api/index` | POST | Index document to RAG |
|
| 208 |
+
| `/api/query` | POST | Query RAG system |
|
| 209 |
+
| `/api/search` | POST | Search similar chunks |
|
| 210 |
+
| `/api/documents` | GET | List indexed documents |
|
| 211 |
+
| `/api/documents/{id}` | DELETE | Delete document |
|
| 212 |
+
|
| 213 |
+
### API Documentation
|
| 214 |
+
|
| 215 |
+
Once backend is running, visit:
|
| 216 |
+
- Swagger UI: `http://lytos:8000/docs`
|
| 217 |
+
- ReDoc: `http://lytos:8000/redoc`
|
| 218 |
+
|
| 219 |
+
---
|
| 220 |
+
|
| 221 |
+
## Environment Variables
|
| 222 |
+
|
| 223 |
+
### Backend (Lytos)
|
| 224 |
+
|
| 225 |
+
```bash
|
| 226 |
+
# Optional: Configure Ollama host if not localhost
|
| 227 |
+
export OLLAMA_HOST=http://localhost:11434
|
| 228 |
+
|
| 229 |
+
# Optional: GPU device selection
|
| 230 |
+
export CUDA_VISIBLE_DEVICES=0
|
| 231 |
+
```
|
| 232 |
+
|
| 233 |
+
### Frontend (Streamlit)
|
| 234 |
+
|
| 235 |
+
Set in `secrets.toml` or Streamlit Cloud secrets:
|
| 236 |
+
|
| 237 |
+
```toml
|
| 238 |
+
# Required for hybrid mode
|
| 239 |
+
BACKEND_URL = "https://your-backend-url"
|
| 240 |
+
|
| 241 |
+
# Authentication
|
| 242 |
+
[auth]
|
| 243 |
+
password = "your-password"
|
| 244 |
+
|
| 245 |
+
# Fallback cloud providers
|
| 246 |
+
GROQ_API_KEY = "..."
|
| 247 |
+
GOOGLE_API_KEY = "..."
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
+
---
|
| 251 |
+
|
| 252 |
+
## Troubleshooting
|
| 253 |
+
|
| 254 |
+
### Backend not reachable
|
| 255 |
+
|
| 256 |
+
1. Check if backend is running:
|
| 257 |
+
```bash
|
| 258 |
+
curl http://localhost:8000/api/health
|
| 259 |
+
```
|
| 260 |
+
|
| 261 |
+
2. Check firewall:
|
| 262 |
+
```bash
|
| 263 |
+
sudo ufw status
|
| 264 |
+
```
|
| 265 |
+
|
| 266 |
+
3. Check nginx logs:
|
| 267 |
+
```bash
|
| 268 |
+
sudo tail -f /var/log/nginx/error.log
|
| 269 |
+
```
|
| 270 |
+
|
| 271 |
+
### GPU not detected
|
| 272 |
+
|
| 273 |
+
1. Check CUDA:
|
| 274 |
+
```bash
|
| 275 |
+
nvidia-smi
|
| 276 |
+
python -c "import torch; print(torch.cuda.is_available())"
|
| 277 |
+
```
|
| 278 |
+
|
| 279 |
+
2. Check PaddlePaddle GPU:
|
| 280 |
+
```bash
|
| 281 |
+
python -c "import paddle; print(paddle.device.is_compiled_with_cuda())"
|
| 282 |
+
```
|
| 283 |
+
|
| 284 |
+
### Ollama not working
|
| 285 |
+
|
| 286 |
+
1. Check Ollama status:
|
| 287 |
+
```bash
|
| 288 |
+
ollama list
|
| 289 |
+
curl http://localhost:11434/api/tags
|
| 290 |
+
```
|
| 291 |
+
|
| 292 |
+
2. Restart Ollama:
|
| 293 |
+
```bash
|
| 294 |
+
sudo systemctl restart ollama
|
| 295 |
+
```
|
| 296 |
+
|
| 297 |
+
### Document processing fails
|
| 298 |
+
|
| 299 |
+
1. Check backend logs:
|
| 300 |
+
```bash
|
| 301 |
+
journalctl -u sparknet-backend -f
|
| 302 |
+
```
|
| 303 |
+
|
| 304 |
+
2. Test processing directly:
|
| 305 |
+
```bash
|
| 306 |
+
curl -X POST http://localhost:8000/api/process \
|
| 307 |
+
-F "file=@test.pdf" \
|
| 308 |
+
-F "ocr_engine=paddleocr"
|
| 309 |
+
```
|
| 310 |
+
|
| 311 |
+
---
|
| 312 |
+
|
| 313 |
+
## Security Considerations
|
| 314 |
+
|
| 315 |
+
### Production Checklist
|
| 316 |
+
|
| 317 |
+
- [ ] Enable HTTPS for backend API
|
| 318 |
+
- [ ] Configure CORS properly (restrict origins)
|
| 319 |
+
- [ ] Use strong authentication password
|
| 320 |
+
- [ ] Enable rate limiting
|
| 321 |
+
- [ ] Set up monitoring and alerts
|
| 322 |
+
- [ ] Configure backup for ChromaDB data
|
| 323 |
+
- [ ] Review GDPR compliance for data handling
|
| 324 |
+
|
| 325 |
+
### CORS Configuration
|
| 326 |
+
|
| 327 |
+
In `backend/api.py`, update for production:
|
| 328 |
+
|
| 329 |
+
```python
|
| 330 |
+
app.add_middleware(
|
| 331 |
+
CORSMiddleware,
|
| 332 |
+
allow_origins=["https://sparknet.streamlit.app"], # Your Streamlit URL
|
| 333 |
+
allow_credentials=True,
|
| 334 |
+
allow_methods=["GET", "POST", "DELETE"],
|
| 335 |
+
allow_headers=["*"],
|
| 336 |
+
)
|
| 337 |
+
```
|
| 338 |
+
|
| 339 |
+
---
|
| 340 |
+
|
| 341 |
+
## Performance Tuning
|
| 342 |
+
|
| 343 |
+
### Backend Workers
|
| 344 |
+
|
| 345 |
+
Adjust based on CPU cores:
|
| 346 |
+
```bash
|
| 347 |
+
uvicorn api:app --workers $(nproc)
|
| 348 |
+
```
|
| 349 |
+
|
| 350 |
+
### GPU Memory
|
| 351 |
+
|
| 352 |
+
For large documents, monitor GPU memory:
|
| 353 |
+
```bash
|
| 354 |
+
watch -n 1 nvidia-smi
|
| 355 |
+
```
|
| 356 |
+
|
| 357 |
+
### ChromaDB Optimization
|
| 358 |
+
|
| 359 |
+
For large document collections:
|
| 360 |
+
```python
|
| 361 |
+
store_config = VectorStoreConfig(
|
| 362 |
+
persist_directory="data/sparknet_unified_rag",
|
| 363 |
+
collection_name="sparknet_documents",
|
| 364 |
+
similarity_threshold=0.0,
|
| 365 |
+
# Add indexing options for better performance
|
| 366 |
+
)
|
| 367 |
+
```
|
| 368 |
+
|
| 369 |
+
---
|
| 370 |
+
|
| 371 |
+
## Contact & Support
|
| 372 |
+
|
| 373 |
+
- **Project**: VISTA/Horizon EU
|
| 374 |
+
- **Framework**: SPARKNET - Strategic Patent Acceleration & Research Kinetics NETwork
|
| 375 |
+
- **Issues**: https://github.com/your-repo/sparknet/issues
|
backend/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPARKNET Backend API
|
| 2 |
+
# GPU-accelerated document processing service
|
backend/api.py
ADDED
|
@@ -0,0 +1,720 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SPARKNET Backend API - GPU-Accelerated Document Processing
|
| 3 |
+
|
| 4 |
+
This FastAPI service runs on a GPU server (e.g., Lytos) and provides:
|
| 5 |
+
- Document processing with PaddleOCR
|
| 6 |
+
- Layout detection
|
| 7 |
+
- RAG indexing and querying
|
| 8 |
+
- Embedding generation
|
| 9 |
+
- LLM inference via Ollama
|
| 10 |
+
|
| 11 |
+
Deploy this on your GPU server and connect Streamlit Cloud to it.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from fastapi import FastAPI, HTTPException, UploadFile, File, Form, BackgroundTasks
|
| 15 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 16 |
+
from pydantic import BaseModel, Field
|
| 17 |
+
from typing import Optional, List, Dict, Any
|
| 18 |
+
import hashlib
|
| 19 |
+
import tempfile
|
| 20 |
+
import os
|
| 21 |
+
import sys
|
| 22 |
+
from pathlib import Path
|
| 23 |
+
from datetime import datetime
|
| 24 |
+
import asyncio
|
| 25 |
+
|
| 26 |
+
# Add project root to path
|
| 27 |
+
PROJECT_ROOT = Path(__file__).parent.parent
|
| 28 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 29 |
+
|
| 30 |
+
app = FastAPI(
|
| 31 |
+
title="SPARKNET Backend API",
|
| 32 |
+
description="GPU-accelerated document processing for Technology Transfer Office automation",
|
| 33 |
+
version="1.0.0",
|
| 34 |
+
docs_url="/docs",
|
| 35 |
+
redoc_url="/redoc",
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# CORS - Allow Streamlit Cloud to connect
|
| 39 |
+
app.add_middleware(
|
| 40 |
+
CORSMiddleware,
|
| 41 |
+
allow_origins=["*"], # Configure specific origins in production
|
| 42 |
+
allow_credentials=True,
|
| 43 |
+
allow_methods=["*"],
|
| 44 |
+
allow_headers=["*"],
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# ============================================================================
|
| 48 |
+
# Pydantic Models
|
| 49 |
+
# ============================================================================
|
| 50 |
+
|
| 51 |
+
class HealthResponse(BaseModel):
|
| 52 |
+
status: str
|
| 53 |
+
timestamp: str
|
| 54 |
+
version: str = "1.0.0"
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class SystemStatus(BaseModel):
|
| 58 |
+
ollama_available: bool
|
| 59 |
+
ollama_models: List[str] = []
|
| 60 |
+
gpu_available: bool = False
|
| 61 |
+
gpu_name: Optional[str] = None
|
| 62 |
+
rag_ready: bool = False
|
| 63 |
+
indexed_chunks: int = 0
|
| 64 |
+
embedding_model: Optional[str] = None
|
| 65 |
+
llm_model: Optional[str] = None
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class ProcessRequest(BaseModel):
|
| 69 |
+
filename: str
|
| 70 |
+
options: Dict[str, Any] = Field(default_factory=dict)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class ProcessResponse(BaseModel):
|
| 74 |
+
success: bool
|
| 75 |
+
doc_id: str
|
| 76 |
+
filename: str
|
| 77 |
+
raw_text: str = ""
|
| 78 |
+
chunks: List[Dict[str, Any]] = []
|
| 79 |
+
page_count: int = 0
|
| 80 |
+
ocr_regions: List[Dict[str, Any]] = []
|
| 81 |
+
layout_regions: List[Dict[str, Any]] = []
|
| 82 |
+
ocr_confidence: float = 0.0
|
| 83 |
+
layout_confidence: float = 0.0
|
| 84 |
+
processing_time: float = 0.0
|
| 85 |
+
error: Optional[str] = None
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class IndexRequest(BaseModel):
|
| 89 |
+
doc_id: str
|
| 90 |
+
text: str
|
| 91 |
+
chunks: List[Dict[str, Any]] = []
|
| 92 |
+
metadata: Dict[str, Any] = Field(default_factory=dict)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
class IndexResponse(BaseModel):
|
| 96 |
+
success: bool
|
| 97 |
+
doc_id: str
|
| 98 |
+
num_chunks: int = 0
|
| 99 |
+
error: Optional[str] = None
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
class QueryRequest(BaseModel):
|
| 103 |
+
question: str
|
| 104 |
+
filters: Optional[Dict[str, Any]] = None
|
| 105 |
+
top_k: int = 5
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class QueryResponse(BaseModel):
|
| 109 |
+
success: bool
|
| 110 |
+
answer: str = ""
|
| 111 |
+
sources: List[Dict[str, Any]] = []
|
| 112 |
+
confidence: float = 0.0
|
| 113 |
+
latency_ms: float = 0.0
|
| 114 |
+
validated: bool = False
|
| 115 |
+
error: Optional[str] = None
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class SearchRequest(BaseModel):
|
| 119 |
+
query: str
|
| 120 |
+
top_k: int = 5
|
| 121 |
+
doc_filter: Optional[str] = None
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
class DocumentInfo(BaseModel):
|
| 125 |
+
doc_id: str
|
| 126 |
+
filename: str = ""
|
| 127 |
+
chunk_count: int = 0
|
| 128 |
+
indexed_at: Optional[str] = None
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# ============================================================================
|
| 132 |
+
# Global State
|
| 133 |
+
# ============================================================================
|
| 134 |
+
|
| 135 |
+
_rag_system = None
|
| 136 |
+
_processing_queue = {}
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def get_rag_system():
|
| 140 |
+
"""Initialize and return the RAG system."""
|
| 141 |
+
global _rag_system
|
| 142 |
+
|
| 143 |
+
if _rag_system is not None:
|
| 144 |
+
return _rag_system
|
| 145 |
+
|
| 146 |
+
try:
|
| 147 |
+
from src.rag.agentic import AgenticRAG, RAGConfig
|
| 148 |
+
from src.rag.store import get_vector_store, VectorStoreConfig, reset_vector_store
|
| 149 |
+
from src.rag.embeddings import get_embedding_adapter, EmbeddingConfig, reset_embedding_adapter
|
| 150 |
+
|
| 151 |
+
# Check Ollama
|
| 152 |
+
ollama_ok, models = check_ollama_sync()
|
| 153 |
+
if not ollama_ok:
|
| 154 |
+
return None
|
| 155 |
+
|
| 156 |
+
# Select models
|
| 157 |
+
EMBEDDING_MODELS = ["nomic-embed-text", "mxbai-embed-large:latest", "mxbai-embed-large"]
|
| 158 |
+
LLM_MODELS = ["llama3.2:latest", "llama3.1:8b", "mistral:latest", "qwen2.5:14b"]
|
| 159 |
+
|
| 160 |
+
embed_model = next((m for m in EMBEDDING_MODELS if m in models), EMBEDDING_MODELS[0])
|
| 161 |
+
llm_model = next((m for m in LLM_MODELS if m in models), LLM_MODELS[0])
|
| 162 |
+
|
| 163 |
+
# Reset singletons
|
| 164 |
+
reset_vector_store()
|
| 165 |
+
reset_embedding_adapter()
|
| 166 |
+
|
| 167 |
+
# Initialize embedding adapter
|
| 168 |
+
embed_config = EmbeddingConfig(
|
| 169 |
+
ollama_model=embed_model,
|
| 170 |
+
ollama_base_url="http://localhost:11434",
|
| 171 |
+
)
|
| 172 |
+
embedder = get_embedding_adapter(config=embed_config)
|
| 173 |
+
|
| 174 |
+
# Initialize vector store
|
| 175 |
+
store_config = VectorStoreConfig(
|
| 176 |
+
persist_directory="data/sparknet_unified_rag",
|
| 177 |
+
collection_name="sparknet_documents",
|
| 178 |
+
similarity_threshold=0.0,
|
| 179 |
+
)
|
| 180 |
+
store = get_vector_store(config=store_config)
|
| 181 |
+
|
| 182 |
+
# Initialize RAG config
|
| 183 |
+
rag_config = RAGConfig(
|
| 184 |
+
model=llm_model,
|
| 185 |
+
base_url="http://localhost:11434",
|
| 186 |
+
max_revision_attempts=1,
|
| 187 |
+
enable_query_planning=True,
|
| 188 |
+
enable_reranking=True,
|
| 189 |
+
enable_validation=True,
|
| 190 |
+
retrieval_top_k=10,
|
| 191 |
+
final_top_k=5,
|
| 192 |
+
min_confidence=0.3,
|
| 193 |
+
verbose=False,
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
# Initialize RAG system
|
| 197 |
+
rag = AgenticRAG(
|
| 198 |
+
config=rag_config,
|
| 199 |
+
vector_store=store,
|
| 200 |
+
embedding_adapter=embedder,
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
_rag_system = {
|
| 204 |
+
"rag": rag,
|
| 205 |
+
"store": store,
|
| 206 |
+
"embedder": embedder,
|
| 207 |
+
"embed_model": embed_model,
|
| 208 |
+
"llm_model": llm_model,
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
return _rag_system
|
| 212 |
+
|
| 213 |
+
except Exception as e:
|
| 214 |
+
print(f"RAG init error: {e}")
|
| 215 |
+
return None
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def check_ollama_sync():
|
| 219 |
+
"""Check Ollama availability synchronously."""
|
| 220 |
+
try:
|
| 221 |
+
import httpx
|
| 222 |
+
with httpx.Client(timeout=3.0) as client:
|
| 223 |
+
resp = client.get("http://localhost:11434/api/tags")
|
| 224 |
+
if resp.status_code == 200:
|
| 225 |
+
models = [m["name"] for m in resp.json().get("models", [])]
|
| 226 |
+
return True, models
|
| 227 |
+
except:
|
| 228 |
+
pass
|
| 229 |
+
return False, []
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def check_gpu():
|
| 233 |
+
"""Check GPU availability."""
|
| 234 |
+
try:
|
| 235 |
+
import torch
|
| 236 |
+
if torch.cuda.is_available():
|
| 237 |
+
return True, torch.cuda.get_device_name(0)
|
| 238 |
+
except:
|
| 239 |
+
pass
|
| 240 |
+
return False, None
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# ============================================================================
|
| 244 |
+
# API Endpoints
|
| 245 |
+
# ============================================================================
|
| 246 |
+
|
| 247 |
+
@app.get("/", response_model=HealthResponse)
|
| 248 |
+
async def root():
|
| 249 |
+
"""Health check endpoint."""
|
| 250 |
+
return HealthResponse(
|
| 251 |
+
status="healthy",
|
| 252 |
+
timestamp=datetime.now().isoformat(),
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
@app.get("/api/health", response_model=HealthResponse)
|
| 257 |
+
async def health():
|
| 258 |
+
"""Health check endpoint."""
|
| 259 |
+
return HealthResponse(
|
| 260 |
+
status="healthy",
|
| 261 |
+
timestamp=datetime.now().isoformat(),
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
@app.get("/api/status", response_model=SystemStatus)
|
| 266 |
+
async def get_status():
|
| 267 |
+
"""Get system status including Ollama, GPU, and RAG availability."""
|
| 268 |
+
ollama_ok, models = check_ollama_sync()
|
| 269 |
+
gpu_ok, gpu_name = check_gpu()
|
| 270 |
+
|
| 271 |
+
rag = get_rag_system()
|
| 272 |
+
rag_ready = rag is not None
|
| 273 |
+
|
| 274 |
+
indexed_chunks = 0
|
| 275 |
+
embed_model = None
|
| 276 |
+
llm_model = None
|
| 277 |
+
|
| 278 |
+
if rag:
|
| 279 |
+
try:
|
| 280 |
+
indexed_chunks = rag["store"].count()
|
| 281 |
+
embed_model = rag.get("embed_model")
|
| 282 |
+
llm_model = rag.get("llm_model")
|
| 283 |
+
except:
|
| 284 |
+
pass
|
| 285 |
+
|
| 286 |
+
return SystemStatus(
|
| 287 |
+
ollama_available=ollama_ok,
|
| 288 |
+
ollama_models=models,
|
| 289 |
+
gpu_available=gpu_ok,
|
| 290 |
+
gpu_name=gpu_name,
|
| 291 |
+
rag_ready=rag_ready,
|
| 292 |
+
indexed_chunks=indexed_chunks,
|
| 293 |
+
embedding_model=embed_model,
|
| 294 |
+
llm_model=llm_model,
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
@app.post("/api/process", response_model=ProcessResponse)
|
| 299 |
+
async def process_document(
|
| 300 |
+
file: UploadFile = File(...),
|
| 301 |
+
ocr_engine: str = Form(default="paddleocr"),
|
| 302 |
+
max_pages: int = Form(default=10),
|
| 303 |
+
enable_layout: bool = Form(default=True),
|
| 304 |
+
preserve_tables: bool = Form(default=True),
|
| 305 |
+
):
|
| 306 |
+
"""
|
| 307 |
+
Process a document with OCR and layout detection.
|
| 308 |
+
|
| 309 |
+
This endpoint uses GPU-accelerated PaddleOCR for text extraction.
|
| 310 |
+
"""
|
| 311 |
+
import time
|
| 312 |
+
start_time = time.time()
|
| 313 |
+
|
| 314 |
+
# Read file
|
| 315 |
+
file_bytes = await file.read()
|
| 316 |
+
filename = file.filename
|
| 317 |
+
|
| 318 |
+
# Generate doc ID
|
| 319 |
+
content_hash = hashlib.md5(file_bytes[:1000]).hexdigest()[:8]
|
| 320 |
+
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
| 321 |
+
doc_id = hashlib.md5(f"{filename}_{timestamp}_{content_hash}".encode()).hexdigest()[:12]
|
| 322 |
+
|
| 323 |
+
# Save to temp file
|
| 324 |
+
suffix = Path(filename).suffix
|
| 325 |
+
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
| 326 |
+
tmp.write(file_bytes)
|
| 327 |
+
tmp_path = tmp.name
|
| 328 |
+
|
| 329 |
+
try:
|
| 330 |
+
# Try full document processing pipeline
|
| 331 |
+
try:
|
| 332 |
+
from src.document.pipeline.processor import DocumentProcessor, PipelineConfig
|
| 333 |
+
from src.document.ocr import OCRConfig
|
| 334 |
+
from src.document.layout import LayoutConfig
|
| 335 |
+
from src.document.chunking.chunker import ChunkerConfig
|
| 336 |
+
|
| 337 |
+
chunker_config = ChunkerConfig(
|
| 338 |
+
preserve_table_structure=preserve_tables,
|
| 339 |
+
detect_table_headers=True,
|
| 340 |
+
chunk_tables=True,
|
| 341 |
+
chunk_figures=True,
|
| 342 |
+
include_captions=True,
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
layout_config = LayoutConfig(
|
| 346 |
+
method="rule_based",
|
| 347 |
+
detect_tables=True,
|
| 348 |
+
detect_figures=True,
|
| 349 |
+
detect_headers=True,
|
| 350 |
+
detect_titles=True,
|
| 351 |
+
detect_lists=True,
|
| 352 |
+
min_confidence=0.3,
|
| 353 |
+
heading_font_ratio=1.1,
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
config = PipelineConfig(
|
| 357 |
+
ocr=OCRConfig(engine=ocr_engine),
|
| 358 |
+
layout=layout_config,
|
| 359 |
+
chunking=chunker_config,
|
| 360 |
+
max_pages=max_pages,
|
| 361 |
+
include_ocr_regions=True,
|
| 362 |
+
include_layout_regions=enable_layout,
|
| 363 |
+
generate_full_text=True,
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
processor = DocumentProcessor(config)
|
| 367 |
+
processor.initialize()
|
| 368 |
+
result = processor.process(tmp_path)
|
| 369 |
+
|
| 370 |
+
# Convert to response format
|
| 371 |
+
chunks_list = []
|
| 372 |
+
for chunk in result.chunks:
|
| 373 |
+
chunks_list.append({
|
| 374 |
+
"chunk_id": chunk.chunk_id,
|
| 375 |
+
"text": chunk.text,
|
| 376 |
+
"page": chunk.page,
|
| 377 |
+
"chunk_type": chunk.chunk_type.value,
|
| 378 |
+
"confidence": chunk.confidence,
|
| 379 |
+
"bbox": chunk.bbox.to_xyxy() if chunk.bbox else None,
|
| 380 |
+
})
|
| 381 |
+
|
| 382 |
+
ocr_regions = []
|
| 383 |
+
for region in result.ocr_regions:
|
| 384 |
+
ocr_regions.append({
|
| 385 |
+
"text": region.text,
|
| 386 |
+
"confidence": region.confidence,
|
| 387 |
+
"page": region.page,
|
| 388 |
+
"bbox": region.bbox.to_xyxy() if region.bbox else None,
|
| 389 |
+
})
|
| 390 |
+
|
| 391 |
+
layout_regions = []
|
| 392 |
+
for region in result.layout_regions:
|
| 393 |
+
layout_regions.append({
|
| 394 |
+
"id": region.id,
|
| 395 |
+
"type": region.type.value,
|
| 396 |
+
"confidence": region.confidence,
|
| 397 |
+
"page": region.page,
|
| 398 |
+
"bbox": region.bbox.to_xyxy() if region.bbox else None,
|
| 399 |
+
})
|
| 400 |
+
|
| 401 |
+
processing_time = time.time() - start_time
|
| 402 |
+
|
| 403 |
+
return ProcessResponse(
|
| 404 |
+
success=True,
|
| 405 |
+
doc_id=doc_id,
|
| 406 |
+
filename=filename,
|
| 407 |
+
raw_text=result.full_text,
|
| 408 |
+
chunks=chunks_list,
|
| 409 |
+
page_count=result.metadata.num_pages,
|
| 410 |
+
ocr_regions=ocr_regions,
|
| 411 |
+
layout_regions=layout_regions,
|
| 412 |
+
ocr_confidence=result.metadata.ocr_confidence_avg or 0.0,
|
| 413 |
+
layout_confidence=result.metadata.layout_confidence_avg or 0.0,
|
| 414 |
+
processing_time=processing_time,
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
except Exception as e:
|
| 418 |
+
# Fallback to simple extraction
|
| 419 |
+
return await process_document_fallback(file_bytes, filename, doc_id, max_pages, str(e), start_time)
|
| 420 |
+
|
| 421 |
+
finally:
|
| 422 |
+
# Cleanup
|
| 423 |
+
if os.path.exists(tmp_path):
|
| 424 |
+
os.unlink(tmp_path)
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
async def process_document_fallback(
|
| 428 |
+
file_bytes: bytes,
|
| 429 |
+
filename: str,
|
| 430 |
+
doc_id: str,
|
| 431 |
+
max_pages: int,
|
| 432 |
+
reason: str,
|
| 433 |
+
start_time: float
|
| 434 |
+
) -> ProcessResponse:
|
| 435 |
+
"""Fallback document processing using PyMuPDF."""
|
| 436 |
+
import time
|
| 437 |
+
|
| 438 |
+
text = ""
|
| 439 |
+
page_count = 1
|
| 440 |
+
suffix = Path(filename).suffix.lower()
|
| 441 |
+
|
| 442 |
+
if suffix == ".pdf":
|
| 443 |
+
try:
|
| 444 |
+
import fitz
|
| 445 |
+
import io
|
| 446 |
+
pdf_stream = io.BytesIO(file_bytes)
|
| 447 |
+
doc = fitz.open(stream=pdf_stream, filetype="pdf")
|
| 448 |
+
page_count = len(doc)
|
| 449 |
+
max_p = min(max_pages, page_count)
|
| 450 |
+
|
| 451 |
+
text_parts = []
|
| 452 |
+
for page_num in range(max_p):
|
| 453 |
+
page = doc[page_num]
|
| 454 |
+
text_parts.append(f"--- Page {page_num + 1} ---\n{page.get_text()}")
|
| 455 |
+
text = "\n\n".join(text_parts)
|
| 456 |
+
doc.close()
|
| 457 |
+
except Exception as e:
|
| 458 |
+
text = f"PDF extraction failed: {e}"
|
| 459 |
+
elif suffix in [".txt", ".md"]:
|
| 460 |
+
try:
|
| 461 |
+
text = file_bytes.decode("utf-8")
|
| 462 |
+
except:
|
| 463 |
+
text = file_bytes.decode("latin-1", errors="ignore")
|
| 464 |
+
else:
|
| 465 |
+
text = f"Unsupported file type: {suffix}"
|
| 466 |
+
|
| 467 |
+
# Simple chunking
|
| 468 |
+
chunk_size = 500
|
| 469 |
+
overlap = 50
|
| 470 |
+
chunks = []
|
| 471 |
+
|
| 472 |
+
for i in range(0, len(text), chunk_size - overlap):
|
| 473 |
+
chunk_text = text[i:i + chunk_size]
|
| 474 |
+
if len(chunk_text.strip()) > 20:
|
| 475 |
+
chunks.append({
|
| 476 |
+
"chunk_id": f"{doc_id}_chunk_{len(chunks)}",
|
| 477 |
+
"text": chunk_text,
|
| 478 |
+
"page": 0,
|
| 479 |
+
"chunk_type": "text",
|
| 480 |
+
"confidence": 0.9,
|
| 481 |
+
"bbox": None,
|
| 482 |
+
})
|
| 483 |
+
|
| 484 |
+
processing_time = time.time() - start_time
|
| 485 |
+
|
| 486 |
+
return ProcessResponse(
|
| 487 |
+
success=True,
|
| 488 |
+
doc_id=doc_id,
|
| 489 |
+
filename=filename,
|
| 490 |
+
raw_text=text,
|
| 491 |
+
chunks=chunks,
|
| 492 |
+
page_count=page_count,
|
| 493 |
+
ocr_regions=[],
|
| 494 |
+
layout_regions=[],
|
| 495 |
+
ocr_confidence=0.9,
|
| 496 |
+
layout_confidence=0.0,
|
| 497 |
+
processing_time=processing_time,
|
| 498 |
+
error=f"Fallback mode: {reason}",
|
| 499 |
+
)
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
@app.post("/api/index", response_model=IndexResponse)
|
| 503 |
+
async def index_document(request: IndexRequest):
|
| 504 |
+
"""Index a document into the RAG vector store."""
|
| 505 |
+
rag = get_rag_system()
|
| 506 |
+
|
| 507 |
+
if not rag:
|
| 508 |
+
return IndexResponse(
|
| 509 |
+
success=False,
|
| 510 |
+
doc_id=request.doc_id,
|
| 511 |
+
error="RAG system not available. Check Ollama status.",
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
try:
|
| 515 |
+
store = rag["store"]
|
| 516 |
+
embedder = rag["embedder"]
|
| 517 |
+
|
| 518 |
+
chunk_dicts = []
|
| 519 |
+
embeddings = []
|
| 520 |
+
|
| 521 |
+
for i, chunk in enumerate(request.chunks):
|
| 522 |
+
chunk_text = chunk.get("text", "") if isinstance(chunk, dict) else str(chunk)
|
| 523 |
+
|
| 524 |
+
if len(chunk_text.strip()) < 20:
|
| 525 |
+
continue
|
| 526 |
+
|
| 527 |
+
chunk_id = chunk.get("chunk_id", f"{request.doc_id}_chunk_{i}")
|
| 528 |
+
chunk_dict = {
|
| 529 |
+
"chunk_id": chunk_id,
|
| 530 |
+
"document_id": request.doc_id,
|
| 531 |
+
"text": chunk_text,
|
| 532 |
+
"page": chunk.get("page", 0) if isinstance(chunk, dict) else 0,
|
| 533 |
+
"chunk_type": "text",
|
| 534 |
+
"source_path": request.metadata.get("filename", ""),
|
| 535 |
+
"sequence_index": i,
|
| 536 |
+
}
|
| 537 |
+
chunk_dicts.append(chunk_dict)
|
| 538 |
+
|
| 539 |
+
embedding = embedder.embed_text(chunk_text)
|
| 540 |
+
embeddings.append(embedding)
|
| 541 |
+
|
| 542 |
+
if not chunk_dicts:
|
| 543 |
+
return IndexResponse(
|
| 544 |
+
success=False,
|
| 545 |
+
doc_id=request.doc_id,
|
| 546 |
+
error="No valid chunks to index",
|
| 547 |
+
)
|
| 548 |
+
|
| 549 |
+
store.add_chunks(chunk_dicts, embeddings)
|
| 550 |
+
|
| 551 |
+
return IndexResponse(
|
| 552 |
+
success=True,
|
| 553 |
+
doc_id=request.doc_id,
|
| 554 |
+
num_chunks=len(chunk_dicts),
|
| 555 |
+
)
|
| 556 |
+
|
| 557 |
+
except Exception as e:
|
| 558 |
+
return IndexResponse(
|
| 559 |
+
success=False,
|
| 560 |
+
doc_id=request.doc_id,
|
| 561 |
+
error=str(e),
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
@app.post("/api/query", response_model=QueryResponse)
|
| 566 |
+
async def query_rag(request: QueryRequest):
|
| 567 |
+
"""Query the RAG system."""
|
| 568 |
+
import time
|
| 569 |
+
start_time = time.time()
|
| 570 |
+
|
| 571 |
+
rag = get_rag_system()
|
| 572 |
+
|
| 573 |
+
if not rag:
|
| 574 |
+
return QueryResponse(
|
| 575 |
+
success=False,
|
| 576 |
+
error="RAG system not available. Check Ollama status.",
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
try:
|
| 580 |
+
response = rag["rag"].query(request.question, filters=request.filters)
|
| 581 |
+
latency_ms = (time.time() - start_time) * 1000
|
| 582 |
+
|
| 583 |
+
sources = []
|
| 584 |
+
if hasattr(response, 'citations') and response.citations:
|
| 585 |
+
for cite in response.citations:
|
| 586 |
+
sources.append({
|
| 587 |
+
"index": cite.index if hasattr(cite, 'index') else 0,
|
| 588 |
+
"text_snippet": cite.text_snippet if hasattr(cite, 'text_snippet') else str(cite),
|
| 589 |
+
"relevance_score": cite.relevance_score if hasattr(cite, 'relevance_score') else 0.0,
|
| 590 |
+
"document_id": cite.document_id if hasattr(cite, 'document_id') else "",
|
| 591 |
+
"page": cite.page if hasattr(cite, 'page') else 0,
|
| 592 |
+
})
|
| 593 |
+
|
| 594 |
+
return QueryResponse(
|
| 595 |
+
success=True,
|
| 596 |
+
answer=response.answer,
|
| 597 |
+
sources=sources,
|
| 598 |
+
confidence=response.confidence,
|
| 599 |
+
latency_ms=latency_ms,
|
| 600 |
+
validated=response.validated,
|
| 601 |
+
)
|
| 602 |
+
|
| 603 |
+
except Exception as e:
|
| 604 |
+
return QueryResponse(
|
| 605 |
+
success=False,
|
| 606 |
+
error=str(e),
|
| 607 |
+
)
|
| 608 |
+
|
| 609 |
+
|
| 610 |
+
@app.post("/api/search")
|
| 611 |
+
async def search_similar(request: SearchRequest):
|
| 612 |
+
"""Search for similar chunks."""
|
| 613 |
+
rag = get_rag_system()
|
| 614 |
+
|
| 615 |
+
if not rag:
|
| 616 |
+
return {"success": False, "error": "RAG system not available", "results": []}
|
| 617 |
+
|
| 618 |
+
try:
|
| 619 |
+
embedder = rag["embedder"]
|
| 620 |
+
store = rag["store"]
|
| 621 |
+
|
| 622 |
+
query_embedding = embedder.embed_text(request.query)
|
| 623 |
+
|
| 624 |
+
filters = None
|
| 625 |
+
if request.doc_filter:
|
| 626 |
+
filters = {"document_id": request.doc_filter}
|
| 627 |
+
|
| 628 |
+
results = store.search(
|
| 629 |
+
query_embedding=query_embedding,
|
| 630 |
+
top_k=request.top_k,
|
| 631 |
+
filters=filters,
|
| 632 |
+
)
|
| 633 |
+
|
| 634 |
+
return {
|
| 635 |
+
"success": True,
|
| 636 |
+
"results": [
|
| 637 |
+
{
|
| 638 |
+
"chunk_id": r.chunk_id,
|
| 639 |
+
"document_id": r.document_id,
|
| 640 |
+
"text": r.text,
|
| 641 |
+
"similarity": r.similarity,
|
| 642 |
+
"page": r.page,
|
| 643 |
+
"metadata": r.metadata,
|
| 644 |
+
}
|
| 645 |
+
for r in results
|
| 646 |
+
]
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
except Exception as e:
|
| 650 |
+
return {"success": False, "error": str(e), "results": []}
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
@app.get("/api/documents", response_model=List[DocumentInfo])
|
| 654 |
+
async def list_documents():
|
| 655 |
+
"""List all indexed documents."""
|
| 656 |
+
rag = get_rag_system()
|
| 657 |
+
|
| 658 |
+
if not rag:
|
| 659 |
+
return []
|
| 660 |
+
|
| 661 |
+
try:
|
| 662 |
+
store = rag["store"]
|
| 663 |
+
collection = store._collection
|
| 664 |
+
|
| 665 |
+
results = collection.get(include=["metadatas"])
|
| 666 |
+
if not results or not results.get("metadatas"):
|
| 667 |
+
return []
|
| 668 |
+
|
| 669 |
+
doc_info = {}
|
| 670 |
+
for meta in results["metadatas"]:
|
| 671 |
+
doc_id = meta.get("document_id", "unknown")
|
| 672 |
+
if doc_id not in doc_info:
|
| 673 |
+
doc_info[doc_id] = {
|
| 674 |
+
"doc_id": doc_id,
|
| 675 |
+
"filename": meta.get("source_path", ""),
|
| 676 |
+
"chunk_count": 0,
|
| 677 |
+
}
|
| 678 |
+
doc_info[doc_id]["chunk_count"] += 1
|
| 679 |
+
|
| 680 |
+
return [DocumentInfo(**info) for info in doc_info.values()]
|
| 681 |
+
|
| 682 |
+
except Exception as e:
|
| 683 |
+
return []
|
| 684 |
+
|
| 685 |
+
|
| 686 |
+
@app.delete("/api/documents/{doc_id}")
|
| 687 |
+
async def delete_document(doc_id: str):
|
| 688 |
+
"""Delete a document from the index."""
|
| 689 |
+
rag = get_rag_system()
|
| 690 |
+
|
| 691 |
+
if not rag:
|
| 692 |
+
return {"success": False, "error": "RAG system not available"}
|
| 693 |
+
|
| 694 |
+
try:
|
| 695 |
+
store = rag["store"]
|
| 696 |
+
collection = store._collection
|
| 697 |
+
|
| 698 |
+
# Get chunk IDs for this document
|
| 699 |
+
results = collection.get(
|
| 700 |
+
where={"document_id": doc_id},
|
| 701 |
+
include=[]
|
| 702 |
+
)
|
| 703 |
+
|
| 704 |
+
if results and results.get("ids"):
|
| 705 |
+
collection.delete(ids=results["ids"])
|
| 706 |
+
return {"success": True, "deleted_chunks": len(results["ids"])}
|
| 707 |
+
|
| 708 |
+
return {"success": False, "error": "Document not found"}
|
| 709 |
+
|
| 710 |
+
except Exception as e:
|
| 711 |
+
return {"success": False, "error": str(e)}
|
| 712 |
+
|
| 713 |
+
|
| 714 |
+
# ============================================================================
|
| 715 |
+
# Run Server
|
| 716 |
+
# ============================================================================
|
| 717 |
+
|
| 718 |
+
if __name__ == "__main__":
|
| 719 |
+
import uvicorn
|
| 720 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
backend/requirements.txt
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPARKNET Backend Requirements
|
| 2 |
+
# For GPU server (Lytos) deployment
|
| 3 |
+
|
| 4 |
+
# ==============================================================================
|
| 5 |
+
# API Framework
|
| 6 |
+
# ==============================================================================
|
| 7 |
+
fastapi>=0.104.0
|
| 8 |
+
uvicorn[standard]>=0.24.0
|
| 9 |
+
python-multipart>=0.0.6
|
| 10 |
+
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
# Document Processing (GPU-accelerated)
|
| 13 |
+
# ==============================================================================
|
| 14 |
+
paddleocr>=2.7.0
|
| 15 |
+
paddlepaddle-gpu>=2.5.0 # Use paddlepaddle for CPU-only
|
| 16 |
+
|
| 17 |
+
# ==============================================================================
|
| 18 |
+
# PDF Processing
|
| 19 |
+
# ==============================================================================
|
| 20 |
+
pymupdf>=1.23.0
|
| 21 |
+
|
| 22 |
+
# ==============================================================================
|
| 23 |
+
# Vector Store & Embeddings
|
| 24 |
+
# ==============================================================================
|
| 25 |
+
chromadb>=0.4.0
|
| 26 |
+
sentence-transformers>=2.2.0
|
| 27 |
+
|
| 28 |
+
# ==============================================================================
|
| 29 |
+
# LangChain & LLM
|
| 30 |
+
# ==============================================================================
|
| 31 |
+
langchain>=0.1.0
|
| 32 |
+
langchain-community>=0.0.20
|
| 33 |
+
langchain-ollama>=0.0.1
|
| 34 |
+
ollama>=0.1.0
|
| 35 |
+
|
| 36 |
+
# ==============================================================================
|
| 37 |
+
# Data Handling
|
| 38 |
+
# ==============================================================================
|
| 39 |
+
pydantic>=2.0.0
|
| 40 |
+
pydantic-settings>=2.0.0
|
| 41 |
+
numpy>=1.24.0
|
| 42 |
+
httpx>=0.25.0
|
| 43 |
+
|
| 44 |
+
# ==============================================================================
|
| 45 |
+
# ML/Deep Learning
|
| 46 |
+
# ==============================================================================
|
| 47 |
+
torch>=2.0.0
|
| 48 |
+
torchvision>=0.15.0
|
| 49 |
+
|
| 50 |
+
# ==============================================================================
|
| 51 |
+
# Utilities
|
| 52 |
+
# ==============================================================================
|
| 53 |
+
loguru>=0.7.0
|
| 54 |
+
python-dotenv>=1.0.0
|
demo/backend_client.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SPARKNET Backend Client
|
| 3 |
+
|
| 4 |
+
Client for connecting Streamlit Cloud to the GPU backend server (Lytos).
|
| 5 |
+
Handles all API communication with the FastAPI backend.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import httpx
|
| 9 |
+
import streamlit as st
|
| 10 |
+
from typing import Optional, Dict, Any, List, Tuple
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_backend_url() -> Optional[str]:
|
| 16 |
+
"""Get backend URL from secrets or environment."""
|
| 17 |
+
# Try Streamlit secrets first
|
| 18 |
+
try:
|
| 19 |
+
if hasattr(st, 'secrets'):
|
| 20 |
+
if "BACKEND_URL" in st.secrets:
|
| 21 |
+
return st.secrets["BACKEND_URL"]
|
| 22 |
+
if "backend" in st.secrets and "url" in st.secrets["backend"]:
|
| 23 |
+
return st.secrets["backend"]["url"]
|
| 24 |
+
except:
|
| 25 |
+
pass
|
| 26 |
+
# Fall back to environment
|
| 27 |
+
return os.environ.get("SPARKNET_BACKEND_URL")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def is_backend_configured() -> bool:
|
| 31 |
+
"""Check if backend is configured."""
|
| 32 |
+
return get_backend_url() is not None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@dataclass
|
| 36 |
+
class BackendResponse:
|
| 37 |
+
"""Generic backend response wrapper."""
|
| 38 |
+
success: bool
|
| 39 |
+
data: Dict[str, Any]
|
| 40 |
+
error: Optional[str] = None
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class BackendClient:
|
| 44 |
+
"""
|
| 45 |
+
Client for SPARKNET Backend API.
|
| 46 |
+
|
| 47 |
+
Provides methods to:
|
| 48 |
+
- Check backend health and status
|
| 49 |
+
- Process documents (OCR, layout detection)
|
| 50 |
+
- Index documents to RAG
|
| 51 |
+
- Query RAG system
|
| 52 |
+
- Search similar chunks
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
def __init__(self, base_url: Optional[str] = None, timeout: float = 120.0):
|
| 56 |
+
self.base_url = base_url or get_backend_url()
|
| 57 |
+
self.timeout = timeout
|
| 58 |
+
self._client = None
|
| 59 |
+
|
| 60 |
+
@property
|
| 61 |
+
def is_configured(self) -> bool:
|
| 62 |
+
return self.base_url is not None
|
| 63 |
+
|
| 64 |
+
def _get_client(self) -> httpx.Client:
|
| 65 |
+
if self._client is None:
|
| 66 |
+
self._client = httpx.Client(
|
| 67 |
+
base_url=self.base_url,
|
| 68 |
+
timeout=self.timeout,
|
| 69 |
+
)
|
| 70 |
+
return self._client
|
| 71 |
+
|
| 72 |
+
def close(self):
|
| 73 |
+
if self._client:
|
| 74 |
+
self._client.close()
|
| 75 |
+
self._client = None
|
| 76 |
+
|
| 77 |
+
def health_check(self) -> BackendResponse:
|
| 78 |
+
"""Check if backend is healthy."""
|
| 79 |
+
if not self.is_configured:
|
| 80 |
+
return BackendResponse(False, {}, "Backend URL not configured")
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
client = self._get_client()
|
| 84 |
+
resp = client.get("/api/health")
|
| 85 |
+
resp.raise_for_status()
|
| 86 |
+
return BackendResponse(True, resp.json())
|
| 87 |
+
except Exception as e:
|
| 88 |
+
return BackendResponse(False, {}, str(e))
|
| 89 |
+
|
| 90 |
+
def get_status(self) -> BackendResponse:
|
| 91 |
+
"""Get backend system status."""
|
| 92 |
+
if not self.is_configured:
|
| 93 |
+
return BackendResponse(False, {}, "Backend URL not configured")
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
client = self._get_client()
|
| 97 |
+
resp = client.get("/api/status")
|
| 98 |
+
resp.raise_for_status()
|
| 99 |
+
return BackendResponse(True, resp.json())
|
| 100 |
+
except Exception as e:
|
| 101 |
+
return BackendResponse(False, {}, str(e))
|
| 102 |
+
|
| 103 |
+
def process_document(
|
| 104 |
+
self,
|
| 105 |
+
file_bytes: bytes,
|
| 106 |
+
filename: str,
|
| 107 |
+
ocr_engine: str = "paddleocr",
|
| 108 |
+
max_pages: int = 10,
|
| 109 |
+
enable_layout: bool = True,
|
| 110 |
+
preserve_tables: bool = True,
|
| 111 |
+
) -> BackendResponse:
|
| 112 |
+
"""
|
| 113 |
+
Process a document using the backend.
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
file_bytes: Document content as bytes
|
| 117 |
+
filename: Original filename
|
| 118 |
+
ocr_engine: OCR engine to use (paddleocr, tesseract)
|
| 119 |
+
max_pages: Maximum pages to process
|
| 120 |
+
enable_layout: Enable layout detection
|
| 121 |
+
preserve_tables: Preserve table structure
|
| 122 |
+
|
| 123 |
+
Returns:
|
| 124 |
+
BackendResponse with processing results
|
| 125 |
+
"""
|
| 126 |
+
if not self.is_configured:
|
| 127 |
+
return BackendResponse(False, {}, "Backend URL not configured")
|
| 128 |
+
|
| 129 |
+
try:
|
| 130 |
+
client = self._get_client()
|
| 131 |
+
|
| 132 |
+
files = {"file": (filename, file_bytes)}
|
| 133 |
+
data = {
|
| 134 |
+
"ocr_engine": ocr_engine,
|
| 135 |
+
"max_pages": str(max_pages),
|
| 136 |
+
"enable_layout": str(enable_layout).lower(),
|
| 137 |
+
"preserve_tables": str(preserve_tables).lower(),
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
resp = client.post("/api/process", files=files, data=data)
|
| 141 |
+
resp.raise_for_status()
|
| 142 |
+
return BackendResponse(True, resp.json())
|
| 143 |
+
except Exception as e:
|
| 144 |
+
return BackendResponse(False, {}, str(e))
|
| 145 |
+
|
| 146 |
+
def index_document(
|
| 147 |
+
self,
|
| 148 |
+
doc_id: str,
|
| 149 |
+
text: str,
|
| 150 |
+
chunks: List[Dict[str, Any]],
|
| 151 |
+
metadata: Optional[Dict[str, Any]] = None,
|
| 152 |
+
) -> BackendResponse:
|
| 153 |
+
"""
|
| 154 |
+
Index a document into the RAG system.
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
doc_id: Document identifier
|
| 158 |
+
text: Full document text
|
| 159 |
+
chunks: List of chunk dictionaries
|
| 160 |
+
metadata: Optional metadata
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
BackendResponse with indexing results
|
| 164 |
+
"""
|
| 165 |
+
if not self.is_configured:
|
| 166 |
+
return BackendResponse(False, {}, "Backend URL not configured")
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
client = self._get_client()
|
| 170 |
+
|
| 171 |
+
payload = {
|
| 172 |
+
"doc_id": doc_id,
|
| 173 |
+
"text": text,
|
| 174 |
+
"chunks": chunks,
|
| 175 |
+
"metadata": metadata or {},
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
resp = client.post("/api/index", json=payload)
|
| 179 |
+
resp.raise_for_status()
|
| 180 |
+
return BackendResponse(True, resp.json())
|
| 181 |
+
except Exception as e:
|
| 182 |
+
return BackendResponse(False, {}, str(e))
|
| 183 |
+
|
| 184 |
+
def query(
|
| 185 |
+
self,
|
| 186 |
+
question: str,
|
| 187 |
+
filters: Optional[Dict[str, Any]] = None,
|
| 188 |
+
top_k: int = 5,
|
| 189 |
+
) -> BackendResponse:
|
| 190 |
+
"""
|
| 191 |
+
Query the RAG system.
|
| 192 |
+
|
| 193 |
+
Args:
|
| 194 |
+
question: Query question
|
| 195 |
+
filters: Optional filters (e.g., document_id)
|
| 196 |
+
top_k: Number of results
|
| 197 |
+
|
| 198 |
+
Returns:
|
| 199 |
+
BackendResponse with answer and sources
|
| 200 |
+
"""
|
| 201 |
+
if not self.is_configured:
|
| 202 |
+
return BackendResponse(False, {}, "Backend URL not configured")
|
| 203 |
+
|
| 204 |
+
try:
|
| 205 |
+
client = self._get_client()
|
| 206 |
+
|
| 207 |
+
payload = {
|
| 208 |
+
"question": question,
|
| 209 |
+
"filters": filters,
|
| 210 |
+
"top_k": top_k,
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
resp = client.post("/api/query", json=payload)
|
| 214 |
+
resp.raise_for_status()
|
| 215 |
+
return BackendResponse(True, resp.json())
|
| 216 |
+
except Exception as e:
|
| 217 |
+
return BackendResponse(False, {}, str(e))
|
| 218 |
+
|
| 219 |
+
def search_similar(
|
| 220 |
+
self,
|
| 221 |
+
query: str,
|
| 222 |
+
top_k: int = 5,
|
| 223 |
+
doc_filter: Optional[str] = None,
|
| 224 |
+
) -> BackendResponse:
|
| 225 |
+
"""
|
| 226 |
+
Search for similar chunks.
|
| 227 |
+
|
| 228 |
+
Args:
|
| 229 |
+
query: Search query
|
| 230 |
+
top_k: Number of results
|
| 231 |
+
doc_filter: Optional document ID filter
|
| 232 |
+
|
| 233 |
+
Returns:
|
| 234 |
+
BackendResponse with similar chunks
|
| 235 |
+
"""
|
| 236 |
+
if not self.is_configured:
|
| 237 |
+
return BackendResponse(False, {}, "Backend URL not configured")
|
| 238 |
+
|
| 239 |
+
try:
|
| 240 |
+
client = self._get_client()
|
| 241 |
+
|
| 242 |
+
payload = {
|
| 243 |
+
"query": query,
|
| 244 |
+
"top_k": top_k,
|
| 245 |
+
"doc_filter": doc_filter,
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
resp = client.post("/api/search", json=payload)
|
| 249 |
+
resp.raise_for_status()
|
| 250 |
+
return BackendResponse(True, resp.json())
|
| 251 |
+
except Exception as e:
|
| 252 |
+
return BackendResponse(False, {}, str(e))
|
| 253 |
+
|
| 254 |
+
def list_documents(self) -> BackendResponse:
|
| 255 |
+
"""List all indexed documents."""
|
| 256 |
+
if not self.is_configured:
|
| 257 |
+
return BackendResponse(False, {}, "Backend URL not configured")
|
| 258 |
+
|
| 259 |
+
try:
|
| 260 |
+
client = self._get_client()
|
| 261 |
+
resp = client.get("/api/documents")
|
| 262 |
+
resp.raise_for_status()
|
| 263 |
+
return BackendResponse(True, {"documents": resp.json()})
|
| 264 |
+
except Exception as e:
|
| 265 |
+
return BackendResponse(False, {}, str(e))
|
| 266 |
+
|
| 267 |
+
def delete_document(self, doc_id: str) -> BackendResponse:
|
| 268 |
+
"""Delete a document from the index."""
|
| 269 |
+
if not self.is_configured:
|
| 270 |
+
return BackendResponse(False, {}, "Backend URL not configured")
|
| 271 |
+
|
| 272 |
+
try:
|
| 273 |
+
client = self._get_client()
|
| 274 |
+
resp = client.delete(f"/api/documents/{doc_id}")
|
| 275 |
+
resp.raise_for_status()
|
| 276 |
+
return BackendResponse(True, resp.json())
|
| 277 |
+
except Exception as e:
|
| 278 |
+
return BackendResponse(False, {}, str(e))
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
# Global client instance
|
| 282 |
+
_backend_client: Optional[BackendClient] = None
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def get_backend_client() -> BackendClient:
|
| 286 |
+
"""Get or create the backend client."""
|
| 287 |
+
global _backend_client
|
| 288 |
+
if _backend_client is None:
|
| 289 |
+
_backend_client = BackendClient()
|
| 290 |
+
return _backend_client
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
def check_backend_available() -> Tuple[bool, Dict[str, Any]]:
|
| 294 |
+
"""
|
| 295 |
+
Check if backend is available and return status.
|
| 296 |
+
|
| 297 |
+
Returns:
|
| 298 |
+
Tuple of (available, status_dict)
|
| 299 |
+
"""
|
| 300 |
+
client = get_backend_client()
|
| 301 |
+
|
| 302 |
+
if not client.is_configured:
|
| 303 |
+
return False, {"error": "Backend URL not configured"}
|
| 304 |
+
|
| 305 |
+
# Health check
|
| 306 |
+
health = client.health_check()
|
| 307 |
+
if not health.success:
|
| 308 |
+
return False, {"error": f"Backend not reachable: {health.error}"}
|
| 309 |
+
|
| 310 |
+
# Get full status
|
| 311 |
+
status = client.get_status()
|
| 312 |
+
if not status.success:
|
| 313 |
+
return False, {"error": f"Failed to get status: {status.error}"}
|
| 314 |
+
|
| 315 |
+
return True, status.data
|
demo/rag_config.py
CHANGED
|
@@ -4,9 +4,10 @@ Unified RAG Configuration for SPARKNET Demo
|
|
| 4 |
This module provides a single source of truth for RAG system configuration,
|
| 5 |
ensuring all demo pages use the same vector store, embeddings, and models.
|
| 6 |
|
| 7 |
-
Supports
|
| 8 |
-
1.
|
| 9 |
-
2.
|
|
|
|
| 10 |
"""
|
| 11 |
|
| 12 |
import streamlit as st
|
|
@@ -79,13 +80,30 @@ def check_cloud_providers():
|
|
| 79 |
return providers
|
| 80 |
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
@st.cache_resource
|
| 83 |
def get_unified_rag_system():
|
| 84 |
"""
|
| 85 |
Initialize and return the unified RAG system.
|
| 86 |
|
| 87 |
This is cached at the Streamlit level so all pages share the same instance.
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
"""
|
| 90 |
# Check for required dependencies first
|
| 91 |
try:
|
|
@@ -100,6 +118,25 @@ def get_unified_rag_system():
|
|
| 100 |
"mode": "error",
|
| 101 |
}
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
# Check Ollama availability
|
| 104 |
ollama_ok, available_models = check_ollama()
|
| 105 |
|
|
@@ -210,11 +247,23 @@ def get_store_stats():
|
|
| 210 |
"""Get current vector store statistics."""
|
| 211 |
system = get_unified_rag_system()
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
if system["mode"] == "cloud":
|
| 214 |
return {
|
| 215 |
"total_chunks": 0,
|
| 216 |
"status": "cloud",
|
| 217 |
-
"message": "Cloud mode - indexing requires Ollama",
|
| 218 |
}
|
| 219 |
|
| 220 |
if system["status"] != "ready":
|
|
@@ -235,8 +284,33 @@ def index_document(text: str, document_id: str, metadata: dict = None) -> dict:
|
|
| 235 |
"""Index a document into the unified RAG system."""
|
| 236 |
system = get_unified_rag_system()
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
if system["mode"] == "cloud":
|
| 239 |
-
return {"success": False, "error": "Indexing requires Ollama", "num_chunks": 0}
|
| 240 |
|
| 241 |
if system["status"] != "ready":
|
| 242 |
return {"success": False, "error": system.get("error", "RAG not ready"), "num_chunks": 0}
|
|
@@ -256,6 +330,36 @@ def query_rag(question: str, filters: dict = None):
|
|
| 256 |
"""Query the unified RAG system."""
|
| 257 |
system = get_unified_rag_system()
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
if system["mode"] == "cloud":
|
| 260 |
# Use cloud LLM for Q&A
|
| 261 |
from llm_providers import generate_response
|
|
@@ -283,6 +387,27 @@ def clear_index():
|
|
| 283 |
def get_indexed_documents() -> list:
|
| 284 |
"""Get list of indexed document IDs from vector store."""
|
| 285 |
system = get_unified_rag_system()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
if system["status"] != "ready":
|
| 287 |
return []
|
| 288 |
|
|
@@ -344,6 +469,19 @@ def get_chunks_for_document(document_id: str) -> list:
|
|
| 344 |
def search_similar_chunks(query: str, top_k: int = 5, doc_filter: str = None):
|
| 345 |
"""Search for similar chunks with optional document filter."""
|
| 346 |
system = get_unified_rag_system()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
if system["status"] != "ready":
|
| 348 |
return []
|
| 349 |
|
|
@@ -430,8 +568,21 @@ def auto_index_processed_document(doc_id: str, text: str, chunks: list, metadata
|
|
| 430 |
"""
|
| 431 |
system = get_unified_rag_system()
|
| 432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
if system["mode"] == "cloud":
|
| 434 |
-
return {"success": False, "error": "Indexing requires Ollama", "num_chunks": 0}
|
| 435 |
|
| 436 |
if system["status"] != "ready":
|
| 437 |
return {"success": False, "error": "RAG system not ready", "num_chunks": 0}
|
|
|
|
| 4 |
This module provides a single source of truth for RAG system configuration,
|
| 5 |
ensuring all demo pages use the same vector store, embeddings, and models.
|
| 6 |
|
| 7 |
+
Supports three deployment modes:
|
| 8 |
+
1. Backend API (GPU server like Lytos) - Full processing power
|
| 9 |
+
2. Local Ollama (for on-premise deployments)
|
| 10 |
+
3. Cloud LLM providers (for Streamlit Cloud without backend)
|
| 11 |
"""
|
| 12 |
|
| 13 |
import streamlit as st
|
|
|
|
| 80 |
return providers
|
| 81 |
|
| 82 |
|
| 83 |
+
def check_backend():
|
| 84 |
+
"""Check if backend API is available."""
|
| 85 |
+
try:
|
| 86 |
+
from backend_client import check_backend_available, get_backend_url
|
| 87 |
+
if get_backend_url():
|
| 88 |
+
available, status = check_backend_available()
|
| 89 |
+
return available, status
|
| 90 |
+
except:
|
| 91 |
+
pass
|
| 92 |
+
return False, {}
|
| 93 |
+
|
| 94 |
+
|
| 95 |
@st.cache_resource
|
| 96 |
def get_unified_rag_system():
|
| 97 |
"""
|
| 98 |
Initialize and return the unified RAG system.
|
| 99 |
|
| 100 |
This is cached at the Streamlit level so all pages share the same instance.
|
| 101 |
+
|
| 102 |
+
Priority:
|
| 103 |
+
1. Backend API (GPU server) - if BACKEND_URL is configured
|
| 104 |
+
2. Local Ollama - if running locally
|
| 105 |
+
3. Cloud LLM providers - if API keys configured
|
| 106 |
+
4. Demo mode - no backend available
|
| 107 |
"""
|
| 108 |
# Check for required dependencies first
|
| 109 |
try:
|
|
|
|
| 118 |
"mode": "error",
|
| 119 |
}
|
| 120 |
|
| 121 |
+
# Check backend API first (GPU server)
|
| 122 |
+
backend_ok, backend_status = check_backend()
|
| 123 |
+
if backend_ok:
|
| 124 |
+
return {
|
| 125 |
+
"status": "ready",
|
| 126 |
+
"error": None,
|
| 127 |
+
"rag": None, # Use backend API instead
|
| 128 |
+
"store": None,
|
| 129 |
+
"embedder": None,
|
| 130 |
+
"mode": "backend",
|
| 131 |
+
"backend_status": backend_status,
|
| 132 |
+
"ollama_available": backend_status.get("ollama_available", False),
|
| 133 |
+
"gpu_available": backend_status.get("gpu_available", False),
|
| 134 |
+
"gpu_name": backend_status.get("gpu_name"),
|
| 135 |
+
"embed_model": backend_status.get("embedding_model", "backend"),
|
| 136 |
+
"llm_model": backend_status.get("llm_model", "backend"),
|
| 137 |
+
"indexed_chunks": backend_status.get("indexed_chunks", 0),
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
# Check Ollama availability
|
| 141 |
ollama_ok, available_models = check_ollama()
|
| 142 |
|
|
|
|
| 247 |
"""Get current vector store statistics."""
|
| 248 |
system = get_unified_rag_system()
|
| 249 |
|
| 250 |
+
# Use backend status if available
|
| 251 |
+
if system["mode"] == "backend":
|
| 252 |
+
return {
|
| 253 |
+
"total_chunks": system.get("indexed_chunks", 0),
|
| 254 |
+
"status": "ready",
|
| 255 |
+
"mode": "backend",
|
| 256 |
+
"embed_model": system.get("embed_model", "backend"),
|
| 257 |
+
"llm_model": system.get("llm_model", "backend"),
|
| 258 |
+
"gpu_available": system.get("gpu_available", False),
|
| 259 |
+
"gpu_name": system.get("gpu_name"),
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
if system["mode"] == "cloud":
|
| 263 |
return {
|
| 264 |
"total_chunks": 0,
|
| 265 |
"status": "cloud",
|
| 266 |
+
"message": "Cloud mode - indexing requires backend or Ollama",
|
| 267 |
}
|
| 268 |
|
| 269 |
if system["status"] != "ready":
|
|
|
|
| 284 |
"""Index a document into the unified RAG system."""
|
| 285 |
system = get_unified_rag_system()
|
| 286 |
|
| 287 |
+
# Use backend API if available
|
| 288 |
+
if system["mode"] == "backend":
|
| 289 |
+
try:
|
| 290 |
+
from backend_client import get_backend_client
|
| 291 |
+
client = get_backend_client()
|
| 292 |
+
# Simple chunking for backend indexing
|
| 293 |
+
chunk_size = 500
|
| 294 |
+
overlap = 50
|
| 295 |
+
chunks = []
|
| 296 |
+
for i in range(0, len(text), chunk_size - overlap):
|
| 297 |
+
chunk_text = text[i:i + chunk_size]
|
| 298 |
+
if len(chunk_text.strip()) > 20:
|
| 299 |
+
chunks.append({
|
| 300 |
+
"chunk_id": f"{document_id}_chunk_{len(chunks)}",
|
| 301 |
+
"text": chunk_text,
|
| 302 |
+
"page": 0,
|
| 303 |
+
})
|
| 304 |
+
result = client.index_document(document_id, text, chunks, metadata)
|
| 305 |
+
if result.success:
|
| 306 |
+
return {"success": True, "num_chunks": result.data.get("num_chunks", 0), "error": None}
|
| 307 |
+
else:
|
| 308 |
+
return {"success": False, "error": result.error, "num_chunks": 0}
|
| 309 |
+
except Exception as e:
|
| 310 |
+
return {"success": False, "error": str(e), "num_chunks": 0}
|
| 311 |
+
|
| 312 |
if system["mode"] == "cloud":
|
| 313 |
+
return {"success": False, "error": "Indexing requires backend or Ollama", "num_chunks": 0}
|
| 314 |
|
| 315 |
if system["status"] != "ready":
|
| 316 |
return {"success": False, "error": system.get("error", "RAG not ready"), "num_chunks": 0}
|
|
|
|
| 330 |
"""Query the unified RAG system."""
|
| 331 |
system = get_unified_rag_system()
|
| 332 |
|
| 333 |
+
# Use backend API if available
|
| 334 |
+
if system["mode"] == "backend":
|
| 335 |
+
try:
|
| 336 |
+
from backend_client import get_backend_client
|
| 337 |
+
client = get_backend_client()
|
| 338 |
+
result = client.query(question, filters=filters)
|
| 339 |
+
if result.success:
|
| 340 |
+
data = result.data
|
| 341 |
+
# Create a response object-like dict
|
| 342 |
+
return type('RAGResponse', (), {
|
| 343 |
+
'answer': data.get('answer', ''),
|
| 344 |
+
'citations': [
|
| 345 |
+
type('Citation', (), {
|
| 346 |
+
'index': s.get('index', i+1),
|
| 347 |
+
'text_snippet': s.get('text_snippet', ''),
|
| 348 |
+
'relevance_score': s.get('relevance_score', 0),
|
| 349 |
+
'document_id': s.get('document_id', ''),
|
| 350 |
+
'page': s.get('page', 0),
|
| 351 |
+
})() for i, s in enumerate(data.get('sources', []))
|
| 352 |
+
],
|
| 353 |
+
'confidence': data.get('confidence', 0),
|
| 354 |
+
'latency_ms': data.get('latency_ms', 0),
|
| 355 |
+
'num_sources': len(data.get('sources', [])),
|
| 356 |
+
'validated': data.get('validated', False),
|
| 357 |
+
})(), None
|
| 358 |
+
else:
|
| 359 |
+
return None, result.error
|
| 360 |
+
except Exception as e:
|
| 361 |
+
return None, str(e)
|
| 362 |
+
|
| 363 |
if system["mode"] == "cloud":
|
| 364 |
# Use cloud LLM for Q&A
|
| 365 |
from llm_providers import generate_response
|
|
|
|
| 387 |
def get_indexed_documents() -> list:
|
| 388 |
"""Get list of indexed document IDs from vector store."""
|
| 389 |
system = get_unified_rag_system()
|
| 390 |
+
|
| 391 |
+
# Use backend API if available
|
| 392 |
+
if system["mode"] == "backend":
|
| 393 |
+
try:
|
| 394 |
+
from backend_client import get_backend_client
|
| 395 |
+
client = get_backend_client()
|
| 396 |
+
result = client.list_documents()
|
| 397 |
+
if result.success:
|
| 398 |
+
docs = result.data.get("documents", [])
|
| 399 |
+
return [
|
| 400 |
+
{
|
| 401 |
+
"document_id": d.get("doc_id", d.get("document_id", "")),
|
| 402 |
+
"source_path": d.get("filename", ""),
|
| 403 |
+
"chunk_count": d.get("chunk_count", 0),
|
| 404 |
+
}
|
| 405 |
+
for d in docs
|
| 406 |
+
]
|
| 407 |
+
except:
|
| 408 |
+
pass
|
| 409 |
+
return []
|
| 410 |
+
|
| 411 |
if system["status"] != "ready":
|
| 412 |
return []
|
| 413 |
|
|
|
|
| 469 |
def search_similar_chunks(query: str, top_k: int = 5, doc_filter: str = None):
|
| 470 |
"""Search for similar chunks with optional document filter."""
|
| 471 |
system = get_unified_rag_system()
|
| 472 |
+
|
| 473 |
+
# Use backend API if available
|
| 474 |
+
if system["mode"] == "backend":
|
| 475 |
+
try:
|
| 476 |
+
from backend_client import get_backend_client
|
| 477 |
+
client = get_backend_client()
|
| 478 |
+
result = client.search_similar(query, top_k, doc_filter)
|
| 479 |
+
if result.success:
|
| 480 |
+
return result.data.get("results", [])
|
| 481 |
+
except:
|
| 482 |
+
pass
|
| 483 |
+
return []
|
| 484 |
+
|
| 485 |
if system["status"] != "ready":
|
| 486 |
return []
|
| 487 |
|
|
|
|
| 568 |
"""
|
| 569 |
system = get_unified_rag_system()
|
| 570 |
|
| 571 |
+
# Use backend API if available
|
| 572 |
+
if system["mode"] == "backend":
|
| 573 |
+
try:
|
| 574 |
+
from backend_client import get_backend_client
|
| 575 |
+
client = get_backend_client()
|
| 576 |
+
result = client.index_document(doc_id, text, chunks, metadata)
|
| 577 |
+
if result.success:
|
| 578 |
+
return {"success": True, "num_chunks": result.data.get("num_chunks", 0), "error": None}
|
| 579 |
+
else:
|
| 580 |
+
return {"success": False, "error": result.error, "num_chunks": 0}
|
| 581 |
+
except Exception as e:
|
| 582 |
+
return {"success": False, "error": str(e), "num_chunks": 0}
|
| 583 |
+
|
| 584 |
if system["mode"] == "cloud":
|
| 585 |
+
return {"success": False, "error": "Indexing requires backend or Ollama", "num_chunks": 0}
|
| 586 |
|
| 587 |
if system["status"] != "ready":
|
| 588 |
return {"success": False, "error": "RAG system not ready", "num_chunks": 0}
|
demo/state_manager.py
CHANGED
|
@@ -661,6 +661,8 @@ def render_global_status_bar():
|
|
| 661 |
rag_status = rag_system["status"]
|
| 662 |
rag_mode = rag_system.get("mode", "error")
|
| 663 |
llm_model = rag_system.get("llm_model", "N/A")
|
|
|
|
|
|
|
| 664 |
except:
|
| 665 |
ollama_ok = False
|
| 666 |
cloud_providers = {}
|
|
@@ -668,12 +670,19 @@ def render_global_status_bar():
|
|
| 668 |
rag_mode = "error"
|
| 669 |
llm_model = "N/A"
|
| 670 |
models = []
|
|
|
|
|
|
|
| 671 |
|
| 672 |
# Status bar
|
| 673 |
cols = st.columns(6)
|
| 674 |
|
| 675 |
with cols[0]:
|
| 676 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
st.success(f"Ollama ({len(models)})")
|
| 678 |
elif cloud_providers:
|
| 679 |
st.info(f"Cloud ({len(cloud_providers)})")
|
|
@@ -682,7 +691,10 @@ def render_global_status_bar():
|
|
| 682 |
|
| 683 |
with cols[1]:
|
| 684 |
if rag_status == "ready":
|
| 685 |
-
|
|
|
|
|
|
|
|
|
|
| 686 |
elif rag_mode == "cloud":
|
| 687 |
st.info("Cloud LLM")
|
| 688 |
elif rag_mode == "demo":
|
|
@@ -691,7 +703,12 @@ def render_global_status_bar():
|
|
| 691 |
st.error("RAG Error")
|
| 692 |
|
| 693 |
with cols[2]:
|
| 694 |
-
if rag_mode == "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
provider_name = list(cloud_providers.keys())[0].title()
|
| 696 |
st.info(f"{provider_name}")
|
| 697 |
elif llm_model != "N/A":
|
|
@@ -703,7 +720,13 @@ def render_global_status_bar():
|
|
| 703 |
st.info(f"{summary['total_documents']} Docs")
|
| 704 |
|
| 705 |
with cols[4]:
|
| 706 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
st.success(f"{summary['total_indexed_chunks']} Chunks")
|
| 708 |
else:
|
| 709 |
st.warning("0 Chunks")
|
|
|
|
| 661 |
rag_status = rag_system["status"]
|
| 662 |
rag_mode = rag_system.get("mode", "error")
|
| 663 |
llm_model = rag_system.get("llm_model", "N/A")
|
| 664 |
+
gpu_available = rag_system.get("gpu_available", False)
|
| 665 |
+
gpu_name = rag_system.get("gpu_name", "")
|
| 666 |
except:
|
| 667 |
ollama_ok = False
|
| 668 |
cloud_providers = {}
|
|
|
|
| 670 |
rag_mode = "error"
|
| 671 |
llm_model = "N/A"
|
| 672 |
models = []
|
| 673 |
+
gpu_available = False
|
| 674 |
+
gpu_name = ""
|
| 675 |
|
| 676 |
# Status bar
|
| 677 |
cols = st.columns(6)
|
| 678 |
|
| 679 |
with cols[0]:
|
| 680 |
+
if rag_mode == "backend":
|
| 681 |
+
if gpu_available:
|
| 682 |
+
st.success(f"GPU Backend")
|
| 683 |
+
else:
|
| 684 |
+
st.success("Backend")
|
| 685 |
+
elif ollama_ok:
|
| 686 |
st.success(f"Ollama ({len(models)})")
|
| 687 |
elif cloud_providers:
|
| 688 |
st.info(f"Cloud ({len(cloud_providers)})")
|
|
|
|
| 691 |
|
| 692 |
with cols[1]:
|
| 693 |
if rag_status == "ready":
|
| 694 |
+
if rag_mode == "backend":
|
| 695 |
+
st.success("RAG (Backend)")
|
| 696 |
+
else:
|
| 697 |
+
st.success("RAG Ready")
|
| 698 |
elif rag_mode == "cloud":
|
| 699 |
st.info("Cloud LLM")
|
| 700 |
elif rag_mode == "demo":
|
|
|
|
| 703 |
st.error("RAG Error")
|
| 704 |
|
| 705 |
with cols[2]:
|
| 706 |
+
if rag_mode == "backend":
|
| 707 |
+
if gpu_name:
|
| 708 |
+
st.info(f"{gpu_name[:12]}")
|
| 709 |
+
else:
|
| 710 |
+
st.info(f"{llm_model.split(':')[0] if llm_model else 'Backend'}")
|
| 711 |
+
elif rag_mode == "cloud" and cloud_providers:
|
| 712 |
provider_name = list(cloud_providers.keys())[0].title()
|
| 713 |
st.info(f"{provider_name}")
|
| 714 |
elif llm_model != "N/A":
|
|
|
|
| 720 |
st.info(f"{summary['total_documents']} Docs")
|
| 721 |
|
| 722 |
with cols[4]:
|
| 723 |
+
if rag_mode == "backend":
|
| 724 |
+
indexed = rag_system.get("indexed_chunks", 0)
|
| 725 |
+
if indexed > 0:
|
| 726 |
+
st.success(f"{indexed} Chunks")
|
| 727 |
+
else:
|
| 728 |
+
st.info("0 Chunks")
|
| 729 |
+
elif summary['indexed_documents'] > 0:
|
| 730 |
st.success(f"{summary['total_indexed_chunks']} Chunks")
|
| 731 |
else:
|
| 732 |
st.warning("0 Chunks")
|
docs_connection.md
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPARKNET Deployment Architecture
|
| 2 |
+
|
| 3 |
+
## Quick Answer
|
| 4 |
+
|
| 5 |
+
**For Streamlit Cloud:** Push to **GitHub only** (`git push origin main`), then reboot the app.
|
| 6 |
+
|
| 7 |
+
**For Hugging Face Spaces:** Push to **Hugging Face only** (`git push hf main`).
|
| 8 |
+
|
| 9 |
+
They are **independent deployments** - you choose which platform to use.
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
## Architecture Overview
|
| 14 |
+
|
| 15 |
+
```
|
| 16 |
+
+------------------+ +-------------------+
|
| 17 |
+
| Your Code | | Lytos Server |
|
| 18 |
+
| (Local/Git) | | 172.24.50.21 |
|
| 19 |
+
+--------+---------+ +---------+---------+
|
| 20 |
+
| |
|
| 21 |
+
| | Backend API
|
| 22 |
+
+----+----+ | (port 8000)
|
| 23 |
+
| | |
|
| 24 |
+
v v v
|
| 25 |
+
+-------+ +--------+ +---------------+
|
| 26 |
+
|GitHub | |Hugging | | localtunnel |
|
| 27 |
+
| | |Face | | (public URL) |
|
| 28 |
+
+---+---+ +---+----+ +-------+-------+
|
| 29 |
+
| | |
|
| 30 |
+
| | |
|
| 31 |
+
v v |
|
| 32 |
+
+----------+ +-----------+ |
|
| 33 |
+
|Streamlit | |HF Spaces |<------------+
|
| 34 |
+
|Cloud | | | Backend calls
|
| 35 |
+
+----------+ +-----------+
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
## Platform Comparison
|
| 41 |
+
|
| 42 |
+
| Feature | Streamlit Cloud | Hugging Face Spaces |
|
| 43 |
+
|---------|-----------------|---------------------|
|
| 44 |
+
| **Source** | GitHub repo | HF repo (or GitHub) |
|
| 45 |
+
| **Push command** | `git push origin main` | `git push hf main` |
|
| 46 |
+
| **Auto-rebuild** | Yes (on push) | Yes (on push) |
|
| 47 |
+
| **Secrets** | Dashboard > Settings > Secrets | Settings > Variables |
|
| 48 |
+
| **Free tier** | Yes (limited resources) | Yes (limited resources) |
|
| 49 |
+
| **Custom domain** | Premium only | Premium only |
|
| 50 |
+
| **GPU support** | No | Yes (paid) |
|
| 51 |
+
|
| 52 |
+
---
|
| 53 |
+
|
| 54 |
+
## Your Current Setup
|
| 55 |
+
|
| 56 |
+
### Git Remotes
|
| 57 |
+
```bash
|
| 58 |
+
origin -> github.com:MHHamdan/SPARKNET.git # For Streamlit Cloud
|
| 59 |
+
hf -> hf.co:spaces/mhamdan/SPARKNET.git # For Hugging Face Spaces
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
### Deployment URLs
|
| 63 |
+
- **Streamlit Cloud:** `https://mhhamdan-sparknet.streamlit.app`
|
| 64 |
+
- **Hugging Face:** `https://huggingface.co/spaces/mhamdan/SPARKNET`
|
| 65 |
+
|
| 66 |
+
### Backend (Lytos GPU Server)
|
| 67 |
+
- **Internal:** `http://172.24.50.21:8000`
|
| 68 |
+
- **Public (via tunnel):** `https://selfish-crab-86.loca.lt`
|
| 69 |
+
|
| 70 |
+
---
|
| 71 |
+
|
| 72 |
+
## How to Deploy
|
| 73 |
+
|
| 74 |
+
### Option 1: Streamlit Cloud (Recommended)
|
| 75 |
+
|
| 76 |
+
```bash
|
| 77 |
+
# 1. Make changes locally
|
| 78 |
+
# 2. Commit
|
| 79 |
+
git add .
|
| 80 |
+
git commit -m "Your message"
|
| 81 |
+
|
| 82 |
+
# 3. Push to GitHub
|
| 83 |
+
git push origin main
|
| 84 |
+
|
| 85 |
+
# 4. Streamlit Cloud auto-rebuilds (or manually reboot in dashboard)
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
**Secrets location:** https://share.streamlit.io > Your App > Settings > Secrets
|
| 89 |
+
|
| 90 |
+
### Option 2: Hugging Face Spaces
|
| 91 |
+
|
| 92 |
+
```bash
|
| 93 |
+
# 1. Make changes locally
|
| 94 |
+
# 2. Commit
|
| 95 |
+
git add .
|
| 96 |
+
git commit -m "Your message"
|
| 97 |
+
|
| 98 |
+
# 3. Push to Hugging Face
|
| 99 |
+
git push hf main
|
| 100 |
+
|
| 101 |
+
# 4. HF Spaces auto-rebuilds
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
**Secrets location:** https://huggingface.co/spaces/mhamdan/SPARKNET/settings
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## Keeping Both in Sync
|
| 109 |
+
|
| 110 |
+
If you want both platforms updated:
|
| 111 |
+
|
| 112 |
+
```bash
|
| 113 |
+
git push origin main && git push hf main
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
Or push to both at once:
|
| 117 |
+
```bash
|
| 118 |
+
git remote add all git@github.com:MHHamdan/SPARKNET.git
|
| 119 |
+
git remote set-url --add all git@hf.co:spaces/mhamdan/SPARKNET.git
|
| 120 |
+
git push all main
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
---
|
| 124 |
+
|
| 125 |
+
## Backend Connection Flow
|
| 126 |
+
|
| 127 |
+
```
|
| 128 |
+
User Browser
|
| 129 |
+
|
|
| 130 |
+
v
|
| 131 |
+
Streamlit Cloud (frontend)
|
| 132 |
+
|
|
| 133 |
+
| HTTP requests to BACKEND_URL
|
| 134 |
+
v
|
| 135 |
+
localtunnel (https://selfish-crab-86.loca.lt)
|
| 136 |
+
|
|
| 137 |
+
| tunnels to
|
| 138 |
+
v
|
| 139 |
+
Lytos Server (172.24.50.21:8000)
|
| 140 |
+
|
|
| 141 |
+
| processes with
|
| 142 |
+
v
|
| 143 |
+
PaddleOCR + Ollama + GPU
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
---
|
| 147 |
+
|
| 148 |
+
## Required Secrets (Streamlit Cloud)
|
| 149 |
+
|
| 150 |
+
```toml
|
| 151 |
+
[auth]
|
| 152 |
+
password = "SPARKNET@2026"
|
| 153 |
+
|
| 154 |
+
BACKEND_URL = "https://selfish-crab-86.loca.lt"
|
| 155 |
+
|
| 156 |
+
GROQ_API_KEY = "your-key"
|
| 157 |
+
HF_TOKEN = "your-token"
|
| 158 |
+
GOOGLE_API_KEY = "your-key"
|
| 159 |
+
OPENROUTER_API_KEY = "your-key"
|
| 160 |
+
MISTRAL_API_KEY = "your-key"
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
---
|
| 164 |
+
|
| 165 |
+
## Troubleshooting
|
| 166 |
+
|
| 167 |
+
| Issue | Solution |
|
| 168 |
+
|-------|----------|
|
| 169 |
+
| Changes not appearing | Reboot app in Streamlit dashboard |
|
| 170 |
+
| Backend connection failed | Check if localtunnel is running (`screen -r lt-tunnel`) |
|
| 171 |
+
| Tunnel URL changed | Update `BACKEND_URL` in Streamlit secrets |
|
| 172 |
+
| PaddleOCR warning | Normal on Streamlit Cloud - backend handles OCR |
|
| 173 |
+
|
| 174 |
+
---
|
| 175 |
+
|
| 176 |
+
## Screen Sessions on Lytos
|
| 177 |
+
|
| 178 |
+
```bash
|
| 179 |
+
screen -ls # List sessions
|
| 180 |
+
screen -r sparknet-backend # Attach to backend
|
| 181 |
+
screen -r lt-tunnel # Attach to tunnel
|
| 182 |
+
screen -r ollama # Attach to Ollama
|
| 183 |
+
```
|