AnuragShirke commited on
Commit
4cab845
·
1 Parent(s): edd9bd7

Final Commit for Hosting Deployment

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.cloudrun.template +0 -62
  2. .gitignore +215 -1
  3. CLOUDRUN_DEPLOYMENT.md +0 -350
  4. DEPLOYMENT_AUTOMATION.md +0 -297
  5. DOCUMENTATION_INDEX.md +0 -239
  6. Dockerfile.railway +38 -0
  7. Dockerfile.simple +15 -0
  8. ENVIRONMENT_CONFIGURATION.md +0 -882
  9. FAQ.md +0 -747
  10. FLY_DEPLOYMENT.md +0 -642
  11. PERFORMANCE_OPTIMIZATION.md +0 -1295
  12. SUMMARY.md +0 -129
  13. TROUBLESHOOTING.md +0 -894
  14. cloudbuild.yaml +0 -146
  15. cloudrun/backend-service.yaml +0 -88
  16. cloudrun/cloudrun-config.yaml +0 -95
  17. cloudrun/cloudsql-config.yaml +0 -61
  18. cloudrun/frontend-service.yaml +0 -55
  19. cloudrun/iam-config.yaml +0 -84
  20. cloudrun/qdrant-service.yaml +0 -61
  21. cloudrun/secrets-config.yaml +0 -34
  22. create-test-user.py +73 -0
  23. deploy-cloudrun.sh +0 -422
  24. deploy-railway.sh +30 -44
  25. deploy.sh +0 -549
  26. main-simple.py +47 -0
  27. planning.md +0 -116
  28. rag-quest-hub/.env.vercel +1 -1
  29. rag-quest-hub/.gitignore +138 -0
  30. rag-quest-hub/api/auth/jwt/login.js +0 -101
  31. rag-quest-hub/api/auth/register.js +0 -99
  32. rag-quest-hub/api/health.js +0 -191
  33. rag-quest-hub/api/package.json +0 -15
  34. rag-quest-hub/api/query.js +0 -142
  35. rag-quest-hub/api/upload.js +0 -171
  36. rag-quest-hub/src/App.tsx +2 -0
  37. rag-quest-hub/src/components/ChatInterface.tsx +5 -3
  38. rag-quest-hub/src/components/ConnectionStatus.tsx +0 -283
  39. rag-quest-hub/src/components/ServiceMonitor.tsx +0 -364
  40. rag-quest-hub/src/components/SimpleConnectionStatus.tsx +86 -0
  41. rag-quest-hub/src/pages/Dashboard.tsx +2 -6
  42. rag-quest-hub/src/test/connection-status.integration.test.tsx +0 -133
  43. rag-quest-hub/src/test/docker.integration.test.ts +0 -378
  44. rag-quest-hub/src/test/health-check-demo.ts +0 -86
  45. rag-quest-hub/src/test/health-check.test.ts +0 -226
  46. rag-quest-hub/vercel.json +4 -39
  47. railway-database-config.py +0 -101
  48. railway-health-check.sh +0 -318
  49. railway.json +1 -1
  50. requirements-railway.txt +20 -0
.env.cloudrun.template DELETED
@@ -1,62 +0,0 @@
1
- # Cloud Run Environment Variables Template
2
- # Copy this file to .env.cloudrun and fill in the values for Cloud Run deployment
3
-
4
- # Google Cloud Project Configuration
5
- PROJECT_ID=your-gcp-project-id
6
- REGION=us-central1
7
-
8
- # JWT Configuration (REQUIRED - Generate a secure secret)
9
- JWT_SECRET=your-super-secure-jwt-secret-key-change-this-in-production
10
- JWT_LIFETIME_SECONDS=3600
11
-
12
- # User Registration Settings
13
- USER_REGISTRATION_ENABLED=true
14
- EMAIL_VERIFICATION_REQUIRED=false
15
-
16
- # Frontend Configuration (will be updated with actual Cloud Run URLs)
17
- VITE_API_BASE_URL=https://knowledge-assistant-backend-HASH-uc.a.run.app
18
- VITE_API_TIMEOUT=30000
19
- VITE_ENABLE_REGISTRATION=true
20
-
21
- # CORS Configuration (will be updated with actual Cloud Run URLs)
22
- CORS_ORIGINS=https://knowledge-assistant-frontend-HASH-uc.a.run.app
23
-
24
- # Google Gemini API Configuration (replaces Ollama)
25
- GEMINI_API_KEY=your-gemini-api-key-here
26
- GEMINI_MODEL=gemini-1.5-flash
27
-
28
- # Database Configuration (Cloud SQL PostgreSQL)
29
- DATABASE_URL=postgresql+asyncpg://knowledge-assistant-user:PASSWORD@/knowledge-assistant-main-db?host=/cloudsql/PROJECT_ID:REGION:knowledge-assistant-db
30
-
31
- # Qdrant Configuration (Cloud Run service)
32
- QDRANT_HOST=https://knowledge-assistant-qdrant-HASH-uc.a.run.app
33
- QDRANT_PORT=443
34
-
35
- # Python Configuration
36
- PYTHONUNBUFFERED=1
37
- PYTHONDONTWRITEBYTECODE=1
38
-
39
- # Cloud SQL Instance Connection
40
- CLOUD_SQL_CONNECTION_NAME=PROJECT_ID:REGION:knowledge-assistant-db
41
-
42
- # Service Account Emails
43
- BACKEND_SERVICE_ACCOUNT=knowledge-assistant-backend-sa@PROJECT_ID.iam.gserviceaccount.com
44
- QDRANT_SERVICE_ACCOUNT=knowledge-assistant-qdrant-sa@PROJECT_ID.iam.gserviceaccount.com
45
-
46
- # Resource Configuration
47
- BACKEND_MEMORY=1Gi
48
- BACKEND_CPU=1000m
49
- FRONTEND_MEMORY=512Mi
50
- FRONTEND_CPU=1000m
51
- QDRANT_MEMORY=512Mi
52
- QDRANT_CPU=1000m
53
-
54
- # Scaling Configuration
55
- MAX_INSTANCES=10
56
- MIN_INSTANCES=0
57
- QDRANT_MIN_INSTANCES=1
58
-
59
- # Security Configuration
60
- REQUIRE_AUTHENTICATION=false
61
- ENABLE_CORS=true
62
- SECURE_COOKIES=true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore CHANGED
@@ -282,4 +282,218 @@ test-reports/
282
  # .kiro/
283
 
284
  # Docker build context files that shouldn't be included
285
- .dockerignore
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  # .kiro/
283
 
284
  # Docker build context files that shouldn't be included
285
+ .dockerignore
286
+ # =
287
+ ==========================================
288
+ # DEPLOYMENT & SECURITY - CRITICAL ADDITIONS
289
+ # ===========================================
290
+
291
+ # Railway Environment Files (CRITICAL - Contains API keys and secrets)
292
+ .env.railway
293
+ .env.railway.local
294
+ railway-*.env
295
+
296
+ # Vercel Environment Files (CRITICAL - Contains API keys)
297
+ .env.vercel
298
+ .env.vercel.local
299
+ .vercel/
300
+
301
+ # All environment files with secrets (CRITICAL)
302
+ .env.*
303
+ !.env.example
304
+ !.env.*.template
305
+
306
+ # API Keys and Secrets (CRITICAL)
307
+ *api_key*
308
+ *secret*
309
+ *token*
310
+ *.key
311
+ *.pem
312
+ *.p12
313
+ *.pfx
314
+
315
+ # Database URLs and Connection Strings (CRITICAL)
316
+ *DATABASE_URL*
317
+ *DB_URL*
318
+
319
+ # JWT Secrets (CRITICAL)
320
+ *JWT_SECRET*
321
+ *jwt_secret*
322
+
323
+ # ===========================================
324
+ # DEPLOYMENT ARTIFACTS
325
+ # ===========================================
326
+
327
+ # Railway deployment artifacts
328
+ .railway/
329
+ railway.json.local
330
+ railway-*.log
331
+
332
+ # Vercel deployment artifacts
333
+ .vercel/
334
+ vercel.json.local
335
+
336
+ # Build artifacts
337
+ dist/
338
+ build/
339
+ .next/
340
+ .nuxt/
341
+ out/
342
+
343
+ # ===========================================
344
+ # DEVELOPMENT & TESTING
345
+ # ===========================================
346
+
347
+ # Local development databases
348
+ *.db
349
+ *.sqlite*
350
+ knowledge_assistant*.db
351
+ test*.db
352
+
353
+ # Test coverage and reports
354
+ coverage/
355
+ .coverage
356
+ htmlcov/
357
+ test-results/
358
+ .pytest_cache/
359
+
360
+ # Local configuration overrides
361
+ docker-compose.override.yml
362
+ docker-compose.local.yml
363
+
364
+ # ===========================================
365
+ # SENSITIVE DATA & UPLOADS
366
+ # ===========================================
367
+
368
+ # User uploaded documents (if stored locally)
369
+ uploads/
370
+ documents/
371
+ user_documents/
372
+
373
+ # Vector store data
374
+ vector_store/
375
+ qdrant_storage/
376
+ embeddings/
377
+
378
+ # Backup files with potential sensitive data
379
+ backups/
380
+ *.backup
381
+ *.dump
382
+ *.sql
383
+
384
+ # ===========================================
385
+ # SYSTEM & CACHE FILES
386
+ # ===========================================
387
+
388
+ # macOS
389
+ .DS_Store
390
+ .AppleDouble
391
+ .LSOverride
392
+
393
+ # Windows
394
+ Thumbs.db
395
+ ehthumbs.db
396
+ Desktop.ini
397
+
398
+ # Linux
399
+ *~
400
+ .nfs*
401
+
402
+ # Temporary files
403
+ *.tmp
404
+ *.temp
405
+ .tmp/
406
+ .temp/
407
+
408
+ # ===========================================
409
+ # IDE & EDITOR SPECIFIC
410
+ # ===========================================
411
+
412
+ # VSCode
413
+ .vscode/settings.json
414
+ .vscode/launch.json
415
+ .vscode/extensions.json
416
+ !.vscode/settings.json.example
417
+
418
+ # JetBrains IDEs
419
+ .idea/
420
+ *.iml
421
+ *.ipr
422
+ *.iws
423
+
424
+ # Vim
425
+ *.swp
426
+ *.swo
427
+ *~
428
+
429
+ # Emacs
430
+ *~
431
+ \#*\#
432
+ /.emacs.desktop
433
+ /.emacs.desktop.lock
434
+ *.elc
435
+
436
+ # ===========================================
437
+ # LOGS & MONITORING
438
+ # ===========================================
439
+
440
+ # Application logs
441
+ logs/
442
+ *.log
443
+ npm-debug.log*
444
+ yarn-debug.log*
445
+ yarn-error.log*
446
+ pnpm-debug.log*
447
+
448
+ # System logs
449
+ /var/log/
450
+ syslog
451
+
452
+ # ===========================================
453
+ # PACKAGE MANAGERS & DEPENDENCIES
454
+ # ===========================================
455
+
456
+ # Node.js
457
+ node_modules/
458
+ npm-debug.log*
459
+ yarn-debug.log*
460
+ yarn-error.log*
461
+ .pnpm-debug.log*
462
+
463
+ # Python
464
+ __pycache__/
465
+ *.py[cod]
466
+ *$py.class
467
+ .Python
468
+ pip-log.txt
469
+ pip-delete-this-directory.txt
470
+
471
+ # ===========================================
472
+ # DOCKER & CONTAINERS
473
+ # ===========================================
474
+
475
+ # Docker volumes and persistent data
476
+ docker-data/
477
+ postgres-data/
478
+ qdrant-data/
479
+ ollama-data/
480
+ redis-data/
481
+
482
+ # Docker override files
483
+ docker-compose.override.yml
484
+ docker-compose.*.yml
485
+ !docker-compose.yml
486
+ !docker-compose.prod.yml
487
+
488
+ # ===========================================
489
+ # SECURITY SCANNING & COMPLIANCE
490
+ # ===========================================
491
+
492
+ # Security scan results
493
+ security-scan-results/
494
+ vulnerability-reports/
495
+ .snyk
496
+
497
+ # Compliance reports
498
+ compliance-reports/
499
+ audit-logs/
CLOUDRUN_DEPLOYMENT.md DELETED
@@ -1,350 +0,0 @@
1
- # Google Cloud Run Deployment Guide
2
-
3
- This guide provides comprehensive instructions for deploying the Knowledge Assistant RAG application to Google Cloud Run.
4
-
5
- ## Overview
6
-
7
- The Knowledge Assistant application is deployed as three separate Cloud Run services:
8
- - **Frontend**: React application served by nginx
9
- - **Backend**: FastAPI application with database and AI integrations
10
- - **Qdrant**: Vector database for document embeddings
11
-
12
- ## Prerequisites
13
-
14
- ### Required Tools
15
- - [Google Cloud SDK (gcloud)](https://cloud.google.com/sdk/docs/install)
16
- - [Docker](https://docs.docker.com/get-docker/)
17
- - [Git](https://git-scm.com/downloads)
18
-
19
- ### Google Cloud Setup
20
- 1. Create a Google Cloud Project
21
- 2. Enable billing for your project
22
- 3. Install and initialize gcloud CLI:
23
- ```bash
24
- gcloud init
25
- gcloud auth login
26
- ```
27
-
28
- ### API Keys Required
29
- - **Google Gemini API Key**: Get from [Google AI Studio](https://makersuite.google.com/app/apikey)
30
-
31
- ## Quick Start
32
-
33
- ### 1. Clone and Setup
34
- ```bash
35
- git clone <your-repo-url>
36
- cd Knowledge_Assistant_RAG
37
- ```
38
-
39
- ### 2. Create Environment Configuration
40
- ```bash
41
- # Create environment file
42
- ./scripts/cloudrun-env-setup.sh create
43
-
44
- # This will prompt you for:
45
- # - Google Cloud Project ID
46
- # - Google Gemini API Key
47
- ```
48
-
49
- ### 3. Deploy to Cloud Run
50
- ```bash
51
- # Run the complete deployment
52
- ./deploy-cloudrun.sh
53
-
54
- # Or run individual steps:
55
- ./deploy-cloudrun.sh secrets # Create secrets only
56
- ./deploy-cloudrun.sh build # Build and push images only
57
- ./deploy-cloudrun.sh deploy # Deploy services only
58
- ```
59
-
60
- ### 4. Verify Deployment
61
- ```bash
62
- # Run health checks
63
- ./scripts/cloudrun-health-check.sh
64
-
65
- # Quick check
66
- ./scripts/cloudrun-health-check.sh quick
67
- ```
68
-
69
- ## Detailed Deployment Steps
70
-
71
- ### Step 1: Environment Configuration
72
-
73
- Create your environment file:
74
- ```bash
75
- ./scripts/cloudrun-env-setup.sh create .env.cloudrun
76
- ```
77
-
78
- Review and modify the generated `.env.cloudrun` file as needed:
79
- ```bash
80
- # Key variables to verify:
81
- PROJECT_ID=your-gcp-project-id
82
- GEMINI_API_KEY=your-gemini-api-key
83
- JWT_SECRET=auto-generated-secure-secret
84
- ```
85
-
86
- ### Step 2: Google Cloud Setup
87
-
88
- The deployment script will automatically:
89
- - Enable required APIs
90
- - Create service accounts
91
- - Set up IAM permissions
92
- - Create Cloud SQL instance
93
- - Configure Secret Manager
94
-
95
- ### Step 3: Build and Deploy
96
-
97
- The deployment process includes:
98
-
99
- 1. **Build Docker Images**
100
- - Backend: Multi-stage Python Alpine build
101
- - Frontend: Multi-stage Node.js with nginx
102
-
103
- 2. **Create Cloud Infrastructure**
104
- - Cloud SQL PostgreSQL instance (free tier)
105
- - Secret Manager for sensitive data
106
- - Service accounts with minimal permissions
107
-
108
- 3. **Deploy Services**
109
- - Qdrant vector database
110
- - Backend API with database connection
111
- - Frontend with proper API configuration
112
-
113
- ### Step 4: Post-Deployment Configuration
114
-
115
- After deployment, update service URLs:
116
- ```bash
117
- ./scripts/cloudrun-env-setup.sh update-urls .env.cloudrun
118
- ```
119
-
120
- ## Service Configuration
121
-
122
- ### Resource Limits (Free Tier Optimized)
123
-
124
- | Service | Memory | CPU | Min Instances | Max Instances |
125
- |---------|--------|-----|---------------|---------------|
126
- | Frontend | 512Mi | 1000m | 0 | 10 |
127
- | Backend | 1Gi | 1000m | 0 | 10 |
128
- | Qdrant | 512Mi | 1000m | 1 | 5 |
129
-
130
- ### Environment Variables
131
-
132
- #### Frontend
133
- - `VITE_API_BASE_URL`: Backend service URL
134
- - `VITE_API_TIMEOUT`: API request timeout
135
- - `VITE_ENABLE_REGISTRATION`: Enable user registration
136
-
137
- #### Backend
138
- - `DATABASE_URL`: Cloud SQL connection string (from Secret Manager)
139
- - `JWT_SECRET`: JWT signing secret (from Secret Manager)
140
- - `GEMINI_API_KEY`: Google Gemini API key (from Secret Manager)
141
- - `QDRANT_HOST`: Qdrant service URL
142
- - `CORS_ORIGINS`: Allowed frontend origins
143
-
144
- #### Qdrant
145
- - `QDRANT__SERVICE__HTTP_PORT`: HTTP port (6333)
146
- - `QDRANT__SERVICE__GRPC_PORT`: gRPC port (6334)
147
-
148
- ## Security Configuration
149
-
150
- ### Service Accounts
151
- - **Backend Service Account**: Access to Cloud SQL and Secret Manager
152
- - **Qdrant Service Account**: Basic Cloud Run permissions
153
-
154
- ### IAM Roles
155
- - `roles/cloudsql.client`: Cloud SQL access
156
- - `roles/secretmanager.secretAccessor`: Secret Manager access
157
- - `roles/run.invoker`: Service-to-service communication
158
-
159
- ### Secrets Management
160
- All sensitive data is stored in Google Secret Manager:
161
- - JWT signing secret
162
- - Database connection string
163
- - API keys
164
-
165
- ## Monitoring and Maintenance
166
-
167
- ### Health Checks
168
- ```bash
169
- # Comprehensive health check
170
- ./scripts/cloudrun-health-check.sh comprehensive
171
-
172
- # Quick status check
173
- ./scripts/cloudrun-health-check.sh quick
174
-
175
- # Check specific service logs
176
- ./scripts/cloudrun-health-check.sh logs knowledge-assistant-backend 100
177
- ```
178
-
179
- ### Viewing Logs
180
- ```bash
181
- # Backend logs
182
- gcloud logging read "resource.type=\"cloud_run_revision\" AND resource.labels.service_name=\"knowledge-assistant-backend\"" --limit=50
183
-
184
- # Frontend logs
185
- gcloud logging read "resource.type=\"cloud_run_revision\" AND resource.labels.service_name=\"knowledge-assistant-frontend\"" --limit=50
186
- ```
187
-
188
- ### Scaling Configuration
189
- Services auto-scale based on traffic:
190
- - **Scale to zero**: When no requests (saves costs)
191
- - **Auto-scale up**: Based on CPU and memory usage
192
- - **Max instances**: Prevents runaway costs
193
-
194
- ## Cost Optimization
195
-
196
- ### Free Tier Limits
197
- - **Cloud Run**: 2 million requests/month, 400,000 GB-seconds/month
198
- - **Cloud SQL**: db-f1-micro instance, 10GB storage
199
- - **Secret Manager**: 6 active secret versions
200
-
201
- ### Cost-Saving Features
202
- - Scale-to-zero for frontend and backend
203
- - Minimal resource allocation
204
- - Efficient container images
205
- - Request-based billing
206
-
207
- ## Troubleshooting
208
-
209
- ### Common Issues
210
-
211
- #### 1. Build Failures
212
- ```bash
213
- # Check build logs
214
- gcloud builds log <BUILD_ID>
215
-
216
- # Common fixes:
217
- # - Increase build timeout
218
- # - Check Dockerfile syntax
219
- # - Verify base image availability
220
- ```
221
-
222
- #### 2. Service Not Starting
223
- ```bash
224
- # Check service logs
225
- gcloud logging read "resource.type=\"cloud_run_revision\" AND resource.labels.service_name=\"SERVICE_NAME\"" --limit=20
226
-
227
- # Common fixes:
228
- # - Check environment variables
229
- # - Verify secret access
230
- # - Check resource limits
231
- ```
232
-
233
- #### 3. Database Connection Issues
234
- ```bash
235
- # Test Cloud SQL connection
236
- gcloud sql connect knowledge-assistant-db --user=knowledge-assistant-user
237
-
238
- # Common fixes:
239
- # - Check service account permissions
240
- # - Verify Cloud SQL instance is running
241
- # - Check connection string format
242
- ```
243
-
244
- #### 4. Service Communication Issues
245
- ```bash
246
- # Check CORS configuration
247
- curl -X OPTIONS -H "Origin: https://your-frontend-url" https://your-backend-url/health
248
-
249
- # Common fixes:
250
- # - Update CORS_ORIGINS environment variable
251
- # - Check service URLs in frontend configuration
252
- # - Verify IAM permissions for service-to-service calls
253
- ```
254
-
255
- ### Debug Commands
256
- ```bash
257
- # Get service details
258
- gcloud run services describe SERVICE_NAME --region=us-central1
259
-
260
- # Check recent deployments
261
- gcloud run revisions list --service=SERVICE_NAME --region=us-central1
262
-
263
- # View service configuration
264
- gcloud run services describe SERVICE_NAME --region=us-central1 --format=yaml
265
- ```
266
-
267
- ## Updating the Application
268
-
269
- ### Code Updates
270
- ```bash
271
- # Rebuild and redeploy
272
- ./deploy-cloudrun.sh build
273
- ./deploy-cloudrun.sh deploy
274
- ```
275
-
276
- ### Configuration Updates
277
- ```bash
278
- # Update environment variables
279
- gcloud run services update SERVICE_NAME --region=us-central1 --set-env-vars="KEY=VALUE"
280
-
281
- # Update secrets
282
- ./scripts/cloudrun-env-setup.sh create-secrets .env.cloudrun
283
- ```
284
-
285
- ### Database Migrations
286
- ```bash
287
- # Connect to Cloud SQL
288
- gcloud sql connect knowledge-assistant-db --user=knowledge-assistant-user
289
-
290
- # Run migrations (if using Alembic)
291
- # This would be handled automatically by the backend service on startup
292
- ```
293
-
294
- ## Cleanup
295
-
296
- ### Remove All Resources
297
- ```bash
298
- # Delete Cloud Run services
299
- gcloud run services delete knowledge-assistant-frontend --region=us-central1
300
- gcloud run services delete knowledge-assistant-backend --region=us-central1
301
- gcloud run services delete knowledge-assistant-qdrant --region=us-central1
302
-
303
- # Delete Cloud SQL instance
304
- gcloud sql instances delete knowledge-assistant-db
305
-
306
- # Delete secrets
307
- gcloud secrets delete knowledge-assistant-secrets
308
-
309
- # Delete service accounts
310
- gcloud iam service-accounts delete knowledge-assistant-backend-sa@PROJECT_ID.iam.gserviceaccount.com
311
- gcloud iam service-accounts delete knowledge-assistant-qdrant-sa@PROJECT_ID.iam.gserviceaccount.com
312
- ```
313
-
314
- ## Support
315
-
316
- ### Getting Help
317
- - Check the [troubleshooting section](#troubleshooting) above
318
- - Review Cloud Run logs for error messages
319
- - Verify all prerequisites are met
320
- - Ensure API quotas are not exceeded
321
-
322
- ### Useful Resources
323
- - [Google Cloud Run Documentation](https://cloud.google.com/run/docs)
324
- - [Cloud SQL Documentation](https://cloud.google.com/sql/docs)
325
- - [Secret Manager Documentation](https://cloud.google.com/secret-manager/docs)
326
- - [Google Gemini API Documentation](https://ai.google.dev/docs)
327
-
328
- ## Architecture Diagram
329
-
330
- ```
331
- ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
332
- │ Frontend │ │ Backend │ │ Qdrant │
333
- │ (Cloud Run) │────│ (Cloud Run) │────│ (Cloud Run) │
334
- │ │ │ │ │ │
335
- │ React + nginx │ │ FastAPI + DB │ │ Vector Database │
336
- └─────────────────┘ └─────────────────┘ └─────────────────┘
337
-
338
-
339
- ┌─────────────────┐
340
- │ Cloud SQL │
341
- │ (PostgreSQL) │
342
- └─────────────────┘
343
-
344
- ┌─────────────────┐
345
- │ Secret Manager │
346
- │ (Secrets) │
347
- └─────────────────┘
348
- ```
349
-
350
- This deployment provides a scalable, cost-effective solution for running the Knowledge Assistant RAG application on Google Cloud Platform's free tier.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DEPLOYMENT_AUTOMATION.md DELETED
@@ -1,297 +0,0 @@
1
- # Deployment Automation Scripts
2
-
3
- This document describes the deployment automation scripts created for the Knowledge Assistant RAG application.
4
-
5
- ## Overview
6
-
7
- The deployment automation system provides a comprehensive set of tools for deploying, monitoring, and maintaining the Knowledge Assistant RAG application across multiple platforms.
8
-
9
- ## Scripts
10
-
11
- ### 1. Master Deployment Script (`deploy.sh`)
12
-
13
- The main deployment script that provides an interactive interface for deploying to various platforms.
14
-
15
- **Usage:**
16
- ```bash
17
- ./deploy.sh [OPTIONS] [PLATFORM]
18
- ```
19
-
20
- **Platforms:**
21
- - `railway` - Deploy to Railway.app (free tier)
22
- - `fly` - Deploy to Fly.io (free tier)
23
- - `cloudrun` - Deploy to Google Cloud Run
24
- - `vercel` - Deploy to Vercel (hybrid deployment)
25
- - `local` - Deploy locally with Docker
26
-
27
- **Key Features:**
28
- - Interactive platform selection
29
- - Pre-deployment validation
30
- - Environment configuration checking
31
- - Automated prerequisite verification
32
- - Rollback capabilities
33
- - Dry-run mode for testing
34
-
35
- **Examples:**
36
- ```bash
37
- # Interactive deployment
38
- ./deploy.sh
39
-
40
- # Deploy to Railway
41
- ./deploy.sh railway
42
-
43
- # Validate prerequisites only
44
- ./deploy.sh --validate-only
45
-
46
- # Show deployment plan without executing
47
- ./deploy.sh cloudrun --dry-run
48
-
49
- # Deploy only backend services
50
- ./deploy.sh railway --backend-only
51
- ```
52
-
53
- ### 2. Deployment Utilities (`scripts/deployment-utils.sh`)
54
-
55
- A library of common deployment functions and utilities used by other scripts.
56
-
57
- **Key Functions:**
58
- - `generate_jwt_secret()` - Generate secure JWT secrets
59
- - `wait_for_service()` - Wait for services to become ready
60
- - `check_service_health()` - Perform health checks
61
- - `backup_sqlite_database()` - Create database backups
62
- - `validate_env_file()` - Validate environment configurations
63
- - `cleanup_docker_images()` - Clean up old Docker images
64
-
65
- ### 3. Health Check Script (`scripts/health-check.sh`)
66
-
67
- Comprehensive health monitoring for all application services.
68
-
69
- **Usage:**
70
- ```bash
71
- ./scripts/health-check.sh [OPTIONS]
72
- ```
73
-
74
- **Features:**
75
- - Service health monitoring
76
- - Database connectivity checks
77
- - External API validation
78
- - System resource monitoring
79
- - Docker container status
80
- - Detailed health reports
81
-
82
- **Examples:**
83
- ```bash
84
- # Check all services
85
- ./scripts/health-check.sh
86
-
87
- # Check specific platform
88
- ./scripts/health-check.sh --platform railway
89
-
90
- # Save report to file
91
- ./scripts/health-check.sh --output health-report.txt
92
-
93
- # Skip external API checks
94
- ./scripts/health-check.sh --no-external
95
- ```
96
-
97
- ### 4. Deployment Validation (`scripts/validate-deployment.sh`)
98
-
99
- End-to-end functional testing of deployed applications.
100
-
101
- **Usage:**
102
- ```bash
103
- ./scripts/validate-deployment.sh [OPTIONS]
104
- ```
105
-
106
- **Test Coverage:**
107
- - User registration and authentication
108
- - Document upload functionality
109
- - Query processing
110
- - API documentation accessibility
111
- - Database connectivity
112
- - Performance testing
113
-
114
- **Examples:**
115
- ```bash
116
- # Validate local deployment
117
- ./scripts/validate-deployment.sh
118
-
119
- # Validate specific URLs
120
- ./scripts/validate-deployment.sh \
121
- --backend-url https://api.example.com \
122
- --frontend-url https://app.example.com
123
-
124
- # Skip functional tests
125
- ./scripts/validate-deployment.sh --skip-functional
126
- ```
127
-
128
- ### 5. Database Migration (`scripts/migrate-database.sh`)
129
-
130
- Database migration and maintenance utilities.
131
-
132
- **Usage:**
133
- ```bash
134
- ./scripts/migrate-database.sh ACTION [OPTIONS]
135
- ```
136
-
137
- **Actions:**
138
- - `init` - Initialize database with migrations
139
- - `migrate` - Run pending migrations
140
- - `rollback` - Rollback migrations
141
- - `status` - Show migration status
142
- - `backup` - Create database backup
143
- - `reset` - Reset database (DANGEROUS)
144
-
145
- **Examples:**
146
- ```bash
147
- # Initialize database
148
- ./scripts/migrate-database.sh init
149
-
150
- # Run migrations
151
- ./scripts/migrate-database.sh migrate
152
-
153
- # Create backup
154
- ./scripts/migrate-database.sh backup
155
-
156
- # Check status
157
- ./scripts/migrate-database.sh status
158
- ```
159
-
160
- ## Workflow
161
-
162
- ### Typical Deployment Workflow
163
-
164
- 1. **Preparation**
165
- ```bash
166
- # Validate prerequisites
167
- ./deploy.sh --validate-only
168
- ```
169
-
170
- 2. **Deployment**
171
- ```bash
172
- # Deploy to chosen platform
173
- ./deploy.sh railway
174
- ```
175
-
176
- 3. **Validation**
177
- ```bash
178
- # Run health checks
179
- ./scripts/health-check.sh --platform railway
180
-
181
- # Validate functionality
182
- ./scripts/validate-deployment.sh
183
- ```
184
-
185
- 4. **Monitoring**
186
- ```bash
187
- # Continuous health monitoring
188
- ./scripts/health-check.sh --output daily-health.txt
189
- ```
190
-
191
- ### Database Management Workflow
192
-
193
- 1. **Backup**
194
- ```bash
195
- ./scripts/migrate-database.sh backup
196
- ```
197
-
198
- 2. **Migration**
199
- ```bash
200
- ./scripts/migrate-database.sh migrate
201
- ```
202
-
203
- 3. **Validation**
204
- ```bash
205
- ./scripts/migrate-database.sh status
206
- ```
207
-
208
- ## Environment Configuration
209
-
210
- Each platform requires specific environment configuration:
211
-
212
- - **Railway**: `.env.railway`
213
- - **Fly.io**: `.env.fly`
214
- - **Cloud Run**: `.env.cloudrun`
215
- - **Vercel**: `.env.vercel`
216
- - **Local**: `.env.production`
217
-
218
- The scripts will automatically create these files from templates if they don't exist.
219
-
220
- ## Error Handling and Rollback
221
-
222
- All scripts include comprehensive error handling:
223
-
224
- - **Automatic Rollback**: Failed deployments can be automatically rolled back
225
- - **Backup Creation**: Databases are backed up before migrations
226
- - **Health Monitoring**: Continuous monitoring detects issues early
227
- - **Detailed Logging**: All operations are logged with timestamps
228
-
229
- ## Security Features
230
-
231
- - **JWT Secret Validation**: Ensures secure authentication tokens
232
- - **Environment Validation**: Prevents deployment with insecure configurations
233
- - **Secret Management**: Proper handling of sensitive information
234
- - **Access Control**: Platform-specific authentication requirements
235
-
236
- ## Monitoring and Maintenance
237
-
238
- ### Daily Operations
239
- ```bash
240
- # Daily health check
241
- ./scripts/health-check.sh --output logs/health-$(date +%Y%m%d).txt
242
-
243
- # Weekly validation
244
- ./scripts/validate-deployment.sh --output logs/validation-$(date +%Y%m%d).txt
245
- ```
246
-
247
- ### Maintenance Tasks
248
- ```bash
249
- # Clean up old Docker images
250
- source scripts/deployment-utils.sh && cleanup_docker_images
251
-
252
- # Database backup
253
- ./scripts/migrate-database.sh backup
254
-
255
- # System resource check
256
- ./scripts/health-check.sh | grep -E "(Memory|Disk|CPU)"
257
- ```
258
-
259
- ## Troubleshooting
260
-
261
- ### Common Issues
262
-
263
- 1. **Prerequisites Missing**
264
- - Run `./deploy.sh --validate-only` to check requirements
265
- - Install missing CLI tools as indicated
266
-
267
- 2. **Environment Configuration**
268
- - Check environment files exist and have correct values
269
- - Validate JWT secrets are secure (32+ characters)
270
-
271
- 3. **Service Health Issues**
272
- - Use `./scripts/health-check.sh` to identify problems
273
- - Check logs for specific error messages
274
-
275
- 4. **Database Problems**
276
- - Use `./scripts/migrate-database.sh status` to check migrations
277
- - Create backups before making changes
278
-
279
- ### Getting Help
280
-
281
- Each script includes detailed help information:
282
- ```bash
283
- ./deploy.sh --help
284
- ./scripts/health-check.sh --help
285
- ./scripts/validate-deployment.sh --help
286
- ./scripts/migrate-database.sh --help
287
- ```
288
-
289
- ## Integration with Existing Scripts
290
-
291
- The automation scripts integrate with existing platform-specific deployment scripts:
292
-
293
- - `deploy-railway.sh` - Railway deployment
294
- - `deploy-cloudrun.sh` - Google Cloud Run deployment
295
- - `deploy-production.sh` - Local Docker deployment
296
-
297
- The master script (`deploy.sh`) orchestrates these existing scripts while adding validation, monitoring, and error handling capabilities.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DOCUMENTATION_INDEX.md DELETED
@@ -1,239 +0,0 @@
1
- # Documentation Index
2
-
3
- This document provides an overview of all available documentation for the Knowledge Assistant RAG application deployment and maintenance.
4
-
5
- ## 📚 Documentation Overview
6
-
7
- The Knowledge Assistant RAG application includes comprehensive documentation covering deployment, configuration, troubleshooting, and maintenance across multiple platforms.
8
-
9
- ## 🚀 Deployment Guides
10
-
11
- ### Platform-Specific Deployment
12
- - **[Railway Deployment Guide](RAILWAY_DEPLOYMENT.md)** - Deploy to Railway.app (free tier: 512MB RAM, 1GB storage)
13
- - **[Fly.io Deployment Guide](FLY_DEPLOYMENT.md)** - Deploy to Fly.io (free tier: 256MB RAM, 1GB storage)
14
- - **[Google Cloud Run Deployment Guide](CLOUDRUN_DEPLOYMENT.md)** - Deploy to Google Cloud Run (free tier: 1GB memory, 2 vCPU)
15
- - **[Deployment Automation](DEPLOYMENT_AUTOMATION.md)** - Automated deployment scripts and utilities
16
-
17
- ### Quick Start
18
- 1. Choose your preferred platform from the guides above
19
- 2. Follow the platform-specific prerequisites
20
- 3. Run the deployment script: `./deploy.sh platform-name`
21
- 4. Configure environment variables as documented
22
-
23
- ## ⚙️ Configuration
24
-
25
- ### Environment Setup
26
- - **[Environment Configuration Guide](ENVIRONMENT_CONFIGURATION.md)** - Comprehensive guide for environment variables and secrets management
27
- - Core environment variables
28
- - Platform-specific configuration
29
- - Secrets management best practices
30
- - Validation and testing scripts
31
-
32
- ### Key Configuration Files
33
- - `.env.railway` - Railway deployment configuration
34
- - `.env.fly` - Fly.io deployment configuration
35
- - `.env.cloudrun` - Google Cloud Run configuration
36
- - `.env.vercel` - Vercel hybrid deployment configuration
37
-
38
- ## 🔧 Troubleshooting and Maintenance
39
-
40
- ### Problem Resolution
41
- - **[Troubleshooting Guide](TROUBLESHOOTING.md)** - Comprehensive troubleshooting for common issues
42
- - Common deployment issues
43
- - Platform-specific problems
44
- - Service communication issues
45
- - Database problems
46
- - Emergency recovery procedures
47
-
48
- ### Performance and Optimization
49
- - **[Performance Optimization Guide](PERFORMANCE_OPTIMIZATION.md)** - Strategies for optimizing performance and scaling
50
- - Container optimization
51
- - Database performance tuning
52
- - API optimization
53
- - Scaling strategies
54
- - Cost optimization
55
-
56
- ### Frequently Asked Questions
57
- - **[FAQ](FAQ.md)** - Answers to common questions about deployment, configuration, and maintenance
58
- - General questions
59
- - Deployment questions
60
- - Configuration questions
61
- - Performance questions
62
- - Security questions
63
- - Cost and scaling questions
64
-
65
- ## 📋 Quick Reference
66
-
67
- ### Essential Commands
68
-
69
- #### Deployment
70
- ```bash
71
- # Deploy to Railway
72
- ./deploy.sh railway
73
-
74
- # Deploy to Fly.io
75
- ./deploy.sh fly
76
-
77
- # Deploy to Google Cloud Run
78
- ./deploy.sh cloudrun
79
-
80
- # Deploy locally
81
- ./deploy.sh local
82
- ```
83
-
84
- #### Health Checks
85
- ```bash
86
- # Run comprehensive health check
87
- ./scripts/health-check.sh
88
-
89
- # Validate deployment
90
- ./scripts/validate-deployment.sh
91
-
92
- # Check environment variables
93
- ./scripts/validate-environment.sh
94
- ```
95
-
96
- #### Maintenance
97
- ```bash
98
- # Database backup
99
- ./scripts/migrate-database.sh backup
100
-
101
- # Performance monitoring
102
- ./scripts/performance-report.sh
103
-
104
- # Clean up resources
105
- docker system prune -a
106
- ```
107
-
108
- ### Environment Variables Quick Reference
109
-
110
- #### Required Variables
111
- ```bash
112
- JWT_SECRET=your-32-character-minimum-secret
113
- GEMINI_API_KEY=your-google-gemini-api-key
114
- DATABASE_URL=sqlite+aiosqlite:///./data/knowledge_assistant.db
115
- ```
116
-
117
- #### Optional Variables
118
- ```bash
119
- QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
120
- QDRANT_API_KEY=your-qdrant-api-key
121
- CORS_ORIGINS=https://your-domain.com
122
- USER_REGISTRATION_ENABLED=true
123
- ```
124
-
125
- ### Platform Resource Limits
126
-
127
- | Platform | Memory | Storage | CPU | Cost |
128
- |----------|--------|---------|-----|------|
129
- | Railway | 512MB | 1GB | Shared | Free |
130
- | Fly.io | 256MB | 1GB | Shared | Free |
131
- | Cloud Run | 1GB | N/A | 1 vCPU | Free tier |
132
- | Vercel | N/A | N/A | Serverless | Free |
133
-
134
- ## 🆘 Getting Help
135
-
136
- ### Documentation Hierarchy
137
- 1. **Start with FAQ** - Check if your question is already answered
138
- 2. **Platform-specific guides** - For deployment issues
139
- 3. **Troubleshooting guide** - For runtime problems
140
- 4. **Environment configuration** - For setup issues
141
- 5. **Performance guide** - For optimization needs
142
-
143
- ### Support Channels
144
- - **Platform Documentation**: Check official platform docs
145
- - **Community Forums**: Platform-specific Discord/forums
146
- - **Issue Tracking**: Create detailed bug reports with logs
147
- - **Performance Issues**: Use monitoring tools and guides
148
-
149
- ### Diagnostic Information
150
- When seeking help, include:
151
- - Platform and deployment method
152
- - Error messages and logs
153
- - Environment configuration (without secrets)
154
- - Steps to reproduce the issue
155
-
156
- ## 📈 Monitoring and Maintenance
157
-
158
- ### Regular Tasks
159
- - **Daily**: Health checks and log monitoring
160
- - **Weekly**: Performance reviews and cleanup
161
- - **Monthly**: Security updates and backup verification
162
-
163
- ### Key Metrics to Monitor
164
- - Response times (< 200ms target)
165
- - Memory usage (stay within platform limits)
166
- - Error rates (< 1% target)
167
- - Disk usage (monitor growth)
168
-
169
- ### Alerting Setup
170
- Configure alerts for:
171
- - Service downtime
172
- - High error rates
173
- - Resource limit approaching
174
- - Failed deployments
175
-
176
- ## 🔄 Updates and Maintenance
177
-
178
- ### Updating the Application
179
- 1. **Test locally** with new changes
180
- 2. **Backup data** before deployment
181
- 3. **Deploy to staging** (if available)
182
- 4. **Deploy to production** using deployment scripts
183
- 5. **Verify functionality** with health checks
184
-
185
- ### Security Maintenance
186
- - Rotate JWT secrets quarterly
187
- - Update API keys as needed
188
- - Monitor for security updates
189
- - Review access logs regularly
190
-
191
- ## 📊 Architecture Overview
192
-
193
- ```
194
- ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
195
- │ Frontend │ │ Backend │ │ External │
196
- │ (React/Vite) │────│ (FastAPI) │────│ Services │
197
- │ │ │ │ │ │
198
- │ • User Interface│ │ • API Endpoints │ │ • Gemini API │
199
- │ • Document UI │ │ • Auth System │ │ • Qdrant Cloud │
200
- │ • Chat Interface│ │ • File Processing│ │ • PostgreSQL │
201
- └─────────────────┘ └─────────────────┘ └─────────────────┘
202
-
203
- ┌─────────────────┐
204
- │ Database │
205
- │ (SQLite/PG) │
206
- │ │
207
- │ • User Data │
208
- │ • Documents │
209
- │ • Metadata │
210
- └─────────────────┘
211
- ```
212
-
213
- ## 🎯 Best Practices Summary
214
-
215
- ### Deployment
216
- - Use external services for free tier deployments
217
- - Implement proper health checks
218
- - Configure auto-scaling appropriately
219
- - Use platform-specific optimizations
220
-
221
- ### Security
222
- - Never commit secrets to version control
223
- - Use strong JWT secrets (32+ characters)
224
- - Restrict CORS to specific domains
225
- - Implement proper authentication
226
-
227
- ### Performance
228
- - Use caching where appropriate
229
- - Optimize Docker images for size
230
- - Monitor resource usage regularly
231
- - Implement graceful degradation
232
-
233
- ### Maintenance
234
- - Automate backups and health checks
235
- - Monitor logs and metrics
236
- - Keep dependencies updated
237
- - Document configuration changes
238
-
239
- This documentation index provides a comprehensive overview of all available resources for successfully deploying and maintaining the Knowledge Assistant RAG application across multiple platforms.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile.railway ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Railway-optimized Dockerfile for Knowledge Assistant RAG
2
+ FROM python:3.11-slim
3
+
4
+ # Install system dependencies
5
+ RUN apt-get update && apt-get install -y \
6
+ curl \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set the working directory
10
+ WORKDIR /app
11
+
12
+ # Copy requirements and install dependencies
13
+ COPY requirements-railway.txt .
14
+ RUN pip install --no-cache-dir -r requirements-railway.txt
15
+
16
+ # Copy the application code
17
+ COPY ./src /app/src
18
+ COPY ./scripts /app/scripts
19
+ COPY ./alembic /app/alembic
20
+ COPY ./alembic.ini /app/alembic.ini
21
+ COPY ./create-test-user.py /app/create-test-user.py
22
+
23
+ # Create data directory for SQLite database
24
+ RUN mkdir -p /app/data
25
+
26
+ # Make scripts executable
27
+ RUN chmod +x /app/scripts/*.sh
28
+
29
+ # Use Railway's PORT environment variable
30
+ ENV PORT=8000
31
+ EXPOSE $PORT
32
+
33
+ # Add health check
34
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
35
+ CMD curl -f http://localhost:$PORT/health || exit 1
36
+
37
+ # Define the command to run the application
38
+ CMD ["/app/scripts/init-db-railway.sh"]
Dockerfile.simple ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install basic dependencies
6
+ RUN pip install fastapi uvicorn
7
+
8
+ # Copy the simple main file
9
+ COPY main-simple.py /app/main.py
10
+
11
+ # Expose port (Railway uses PORT env var)
12
+ EXPOSE $PORT
13
+
14
+ # Run the application with dynamic port
15
+ CMD uvicorn main:app --host 0.0.0.0 --port ${PORT:-8000}
ENVIRONMENT_CONFIGURATION.md DELETED
@@ -1,882 +0,0 @@
1
- # Environment Variables and Secrets Configuration Guide
2
-
3
- This guide provides comprehensive documentation for configuring environment variables and managing secrets across all deployment platforms for the Knowledge Assistant RAG application.
4
-
5
- ## Table of Contents
6
-
7
- 1. [Core Environment Variables](#core-environment-variables)
8
- 2. [Platform-Specific Configuration](#platform-specific-configuration)
9
- 3. [Secrets Management](#secrets-management)
10
- 4. [Environment Templates](#environment-templates)
11
- 5. [Validation and Testing](#validation-and-testing)
12
- 6. [Security Best Practices](#security-best-practices)
13
- 7. [Troubleshooting](#troubleshooting)
14
-
15
- ## Core Environment Variables
16
-
17
- ### Required Variables
18
-
19
- #### Authentication & Security
20
- ```bash
21
- # JWT Secret Key (REQUIRED)
22
- # Must be at least 32 characters long
23
- # Generate with: openssl rand -base64 32
24
- JWT_SECRET=your-super-secure-jwt-secret-key-32-chars-minimum
25
-
26
- # User Registration Control
27
- USER_REGISTRATION_ENABLED=true # or false to disable new registrations
28
- ```
29
-
30
- #### Database Configuration
31
- ```bash
32
- # SQLite (Default)
33
- DATABASE_URL=sqlite+aiosqlite:///./data/knowledge_assistant.db
34
-
35
- # PostgreSQL (Production)
36
- DATABASE_URL=postgresql://username:password@host:port/database_name
37
-
38
- # PostgreSQL with SSL (Cloud deployments)
39
- DATABASE_URL=postgresql://username:password@host:port/database_name?sslmode=require
40
- ```
41
-
42
- #### Vector Database (Qdrant)
43
- ```bash
44
- # Self-hosted Qdrant
45
- QDRANT_HOST=localhost
46
- QDRANT_PORT=6333
47
-
48
- # Qdrant Cloud
49
- QDRANT_CLOUD_URL=https://your-cluster-id.qdrant.io
50
- QDRANT_API_KEY=your-qdrant-cloud-api-key
51
- ```
52
-
53
- #### LLM Service Configuration
54
- ```bash
55
- # Google Gemini API (Recommended)
56
- GEMINI_API_KEY=your-google-gemini-api-key
57
-
58
- # OpenAI API (Alternative)
59
- OPENAI_API_KEY=your-openai-api-key
60
- USE_OPENAI_INSTEAD_OF_GEMINI=false # Set to true to use OpenAI
61
- ```
62
-
63
- #### CORS Configuration
64
- ```bash
65
- # Frontend Origins (comma-separated)
66
- CORS_ORIGINS=https://your-frontend-domain.com,http://localhost:3000
67
-
68
- # For development
69
- CORS_ORIGINS=http://localhost:3000,http://127.0.0.1:3000
70
- ```
71
-
72
- ### Optional Variables
73
-
74
- #### Application Configuration
75
- ```bash
76
- # Server Configuration
77
- PORT=8000
78
- HOST=0.0.0.0
79
- WORKERS=1
80
-
81
- # Logging
82
- LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL
83
- LOG_FORMAT=json # json or text
84
-
85
- # File Upload Limits
86
- MAX_FILE_SIZE=10485760 # 10MB in bytes
87
- ALLOWED_FILE_TYPES=pdf,txt,docx,md
88
-
89
- # Query Configuration
90
- MAX_QUERY_LENGTH=1000
91
- DEFAULT_SEARCH_LIMIT=10
92
- ```
93
-
94
- #### Performance Tuning
95
- ```bash
96
- # Database Connection Pool
97
- DB_POOL_SIZE=5
98
- DB_MAX_OVERFLOW=10
99
- DB_POOL_TIMEOUT=30
100
-
101
- # Vector Search Configuration
102
- VECTOR_SEARCH_TOP_K=5
103
- EMBEDDING_BATCH_SIZE=100
104
-
105
- # API Timeouts
106
- API_TIMEOUT=30
107
- GEMINI_TIMEOUT=30
108
- QDRANT_TIMEOUT=10
109
- ```
110
-
111
- ### Frontend Environment Variables
112
-
113
- #### React/Vite Configuration
114
- ```bash
115
- # API Configuration
116
- VITE_API_BASE_URL=https://your-backend-domain.com
117
- VITE_API_TIMEOUT=30000
118
-
119
- # Feature Flags
120
- VITE_ENABLE_REGISTRATION=true
121
- VITE_ENABLE_FILE_UPLOAD=true
122
- VITE_ENABLE_DARK_MODE=true
123
-
124
- # Analytics (Optional)
125
- VITE_GOOGLE_ANALYTICS_ID=GA_MEASUREMENT_ID
126
- VITE_SENTRY_DSN=your-sentry-dsn
127
- ```
128
-
129
- ## Platform-Specific Configuration
130
-
131
- ### Railway Configuration
132
-
133
- #### Environment File: `.env.railway`
134
- ```bash
135
- # Railway-specific variables
136
- RAILWAY_ENVIRONMENT=production
137
- PORT=8000
138
-
139
- # Database (Railway PostgreSQL)
140
- DATABASE_URL=$DATABASE_URL # Automatically provided by Railway
141
-
142
- # External Services (Recommended for free tier)
143
- QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
144
- QDRANT_API_KEY=your-qdrant-api-key
145
- GEMINI_API_KEY=your-gemini-api-key
146
-
147
- # Security
148
- JWT_SECRET=your-jwt-secret-32-chars-minimum
149
-
150
- # CORS
151
- CORS_ORIGINS=https://your-app.railway.app
152
-
153
- # Frontend
154
- VITE_API_BASE_URL=https://your-backend.railway.app
155
- ```
156
-
157
- #### Setting Variables via CLI
158
- ```bash
159
- # Login to Railway
160
- railway login
161
-
162
- # Set environment variables
163
- railway variables set JWT_SECRET="$(openssl rand -base64 32)"
164
- railway variables set GEMINI_API_KEY="your-gemini-api-key"
165
- railway variables set USER_REGISTRATION_ENABLED="true"
166
- railway variables set CORS_ORIGINS="https://your-frontend.railway.app"
167
-
168
- # Frontend variables
169
- cd rag-quest-hub
170
- railway variables set VITE_API_BASE_URL="https://your-backend.railway.app"
171
- railway variables set VITE_ENABLE_REGISTRATION="true"
172
- ```
173
-
174
- ### Fly.io Configuration
175
-
176
- #### Environment File: `.env.fly`
177
- ```bash
178
- # Fly.io specific
179
- FLY_APP_NAME=knowledge-assistant-rag
180
- FLY_REGION=ord
181
-
182
- # Database
183
- DATABASE_URL=sqlite+aiosqlite:///./data/knowledge_assistant.db
184
-
185
- # Services
186
- QDRANT_HOST=localhost
187
- QDRANT_PORT=6333
188
-
189
- # External APIs
190
- GEMINI_API_KEY=your-gemini-api-key
191
-
192
- # Security
193
- JWT_SECRET=your-jwt-secret
194
-
195
- # CORS
196
- CORS_ORIGINS=https://your-app.fly.dev
197
- ```
198
-
199
- #### Setting Secrets via CLI
200
- ```bash
201
- # Set secrets
202
- flyctl secrets set JWT_SECRET="$(openssl rand -base64 32)"
203
- flyctl secrets set GEMINI_API_KEY="your-gemini-api-key"
204
-
205
- # Set regular environment variables in fly.toml
206
- [env]
207
- USER_REGISTRATION_ENABLED = "true"
208
- CORS_ORIGINS = "https://your-app.fly.dev"
209
- DATABASE_URL = "sqlite+aiosqlite:///./data/knowledge_assistant.db"
210
- ```
211
-
212
- ### Google Cloud Run Configuration
213
-
214
- #### Environment File: `.env.cloudrun`
215
- ```bash
216
- # Google Cloud Project
217
- PROJECT_ID=your-gcp-project-id
218
- REGION=us-central1
219
-
220
- # Database (Cloud SQL)
221
- DATABASE_URL=postgresql://user:pass@/db?host=/cloudsql/project:region:instance
222
-
223
- # Services
224
- QDRANT_HOST=knowledge-assistant-qdrant-hash-uc.a.run.app
225
- QDRANT_PORT=443
226
-
227
- # External APIs
228
- GEMINI_API_KEY=your-gemini-api-key
229
-
230
- # Security (stored in Secret Manager)
231
- JWT_SECRET=projects/PROJECT_ID/secrets/jwt-secret/versions/latest
232
-
233
- # CORS
234
- CORS_ORIGINS=https://knowledge-assistant-frontend-hash-uc.a.run.app
235
- ```
236
-
237
- #### Setting Variables via CLI
238
- ```bash
239
- # Create secrets in Secret Manager
240
- echo -n "$(openssl rand -base64 32)" | gcloud secrets create jwt-secret --data-file=-
241
- echo -n "your-gemini-api-key" | gcloud secrets create gemini-api-key --data-file=-
242
-
243
- # Update Cloud Run service with environment variables
244
- gcloud run services update knowledge-assistant-backend \
245
- --region=us-central1 \
246
- --set-env-vars="USER_REGISTRATION_ENABLED=true" \
247
- --set-env-vars="CORS_ORIGINS=https://your-frontend-url.com"
248
-
249
- # Update with secrets
250
- gcloud run services update knowledge-assistant-backend \
251
- --region=us-central1 \
252
- --set-secrets="JWT_SECRET=jwt-secret:latest" \
253
- --set-secrets="GEMINI_API_KEY=gemini-api-key:latest"
254
- ```
255
-
256
- ### Vercel Configuration
257
-
258
- #### Environment File: `.env.vercel`
259
- ```bash
260
- # Vercel-specific
261
- VERCEL_ENV=production
262
-
263
- # External Services (All external for serverless)
264
- DATABASE_URL=postgresql://user:pass@host:port/db
265
- QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
266
- QDRANT_API_KEY=your-qdrant-api-key
267
- GEMINI_API_KEY=your-gemini-api-key
268
-
269
- # Security
270
- JWT_SECRET=your-jwt-secret
271
-
272
- # CORS
273
- CORS_ORIGINS=https://your-app.vercel.app
274
-
275
- # Frontend
276
- VITE_API_BASE_URL=https://your-app.vercel.app/api
277
- ```
278
-
279
- #### Setting Variables via CLI
280
- ```bash
281
- # Set environment variables
282
- vercel env add JWT_SECRET production
283
- vercel env add GEMINI_API_KEY production
284
- vercel env add DATABASE_URL production
285
- vercel env add QDRANT_CLOUD_URL production
286
- vercel env add QDRANT_API_KEY production
287
-
288
- # Frontend variables
289
- vercel env add VITE_API_BASE_URL production
290
- vercel env add VITE_ENABLE_REGISTRATION production
291
- ```
292
-
293
- ## Secrets Management
294
-
295
- ### Secret Generation
296
-
297
- #### JWT Secret Generation
298
- ```bash
299
- # Method 1: OpenSSL
300
- openssl rand -base64 32
301
-
302
- # Method 2: Python
303
- python -c "import secrets; print(secrets.token_urlsafe(32))"
304
-
305
- # Method 3: Node.js
306
- node -e "console.log(require('crypto').randomBytes(32).toString('base64'))"
307
-
308
- # Validation: Ensure at least 32 characters
309
- echo "your-jwt-secret" | wc -c
310
- ```
311
-
312
- #### API Key Management
313
- ```bash
314
- # Google Gemini API Key
315
- # 1. Visit https://makersuite.google.com/app/apikey
316
- # 2. Create new API key
317
- # 3. Copy and store securely
318
-
319
- # Qdrant Cloud API Key
320
- # 1. Visit https://cloud.qdrant.io
321
- # 2. Create cluster
322
- # 3. Generate API key from dashboard
323
- ```
324
-
325
- ### Platform-Specific Secret Storage
326
-
327
- #### Railway Secrets
328
- ```bash
329
- # Set via CLI
330
- railway variables set SECRET_NAME="secret_value"
331
-
332
- # Set via web dashboard
333
- # 1. Visit railway.app
334
- # 2. Select your project
335
- # 3. Go to Variables tab
336
- # 4. Add environment variable
337
- ```
338
-
339
- #### Fly.io Secrets
340
- ```bash
341
- # Set secrets (encrypted at rest)
342
- flyctl secrets set SECRET_NAME="secret_value"
343
-
344
- # List secrets (values hidden)
345
- flyctl secrets list
346
-
347
- # Remove secrets
348
- flyctl secrets unset SECRET_NAME
349
- ```
350
-
351
- #### Google Cloud Secret Manager
352
- ```bash
353
- # Create secret
354
- echo -n "secret_value" | gcloud secrets create secret-name --data-file=-
355
-
356
- # Grant access to service account
357
- gcloud secrets add-iam-policy-binding secret-name \
358
- --member="serviceAccount:service-account@project.iam.gserviceaccount.com" \
359
- --role="roles/secretmanager.secretAccessor"
360
-
361
- # Use in Cloud Run
362
- gcloud run services update service-name \
363
- --set-secrets="ENV_VAR=secret-name:latest"
364
- ```
365
-
366
- #### Vercel Environment Variables
367
- ```bash
368
- # Set via CLI
369
- vercel env add SECRET_NAME
370
-
371
- # Set via web dashboard
372
- # 1. Visit vercel.com
373
- # 2. Select your project
374
- # 3. Go to Settings > Environment Variables
375
- # 4. Add variable with appropriate environment
376
- ```
377
-
378
- ## Environment Templates
379
-
380
- ### Development Template (`.env.development`)
381
- ```bash
382
- # Development Configuration
383
- NODE_ENV=development
384
- DEBUG=true
385
- LOG_LEVEL=DEBUG
386
-
387
- # Database
388
- DATABASE_URL=sqlite+aiosqlite:///./data/knowledge_assistant_dev.db
389
-
390
- # Services (Local)
391
- QDRANT_HOST=localhost
392
- QDRANT_PORT=6333
393
-
394
- # External APIs
395
- GEMINI_API_KEY=your-dev-gemini-api-key
396
-
397
- # Security (Use different secret for dev)
398
- JWT_SECRET=development-jwt-secret-32-chars-minimum
399
-
400
- # CORS (Allow local development)
401
- CORS_ORIGINS=http://localhost:3000,http://127.0.0.1:3000
402
-
403
- # Frontend
404
- VITE_API_BASE_URL=http://localhost:8000
405
- VITE_ENABLE_REGISTRATION=true
406
- ```
407
-
408
- ### Production Template (`.env.production`)
409
- ```bash
410
- # Production Configuration
411
- NODE_ENV=production
412
- DEBUG=false
413
- LOG_LEVEL=INFO
414
-
415
- # Database (Use PostgreSQL in production)
416
- DATABASE_URL=postgresql://user:password@host:port/database
417
-
418
- # Services
419
- QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
420
- QDRANT_API_KEY=your-production-qdrant-api-key
421
-
422
- # External APIs
423
- GEMINI_API_KEY=your-production-gemini-api-key
424
-
425
- # Security
426
- JWT_SECRET=production-jwt-secret-32-chars-minimum
427
-
428
- # CORS (Restrict to your domain)
429
- CORS_ORIGINS=https://your-production-domain.com
430
-
431
- # Frontend
432
- VITE_API_BASE_URL=https://your-production-api-domain.com
433
- VITE_ENABLE_REGISTRATION=false # Disable registration in production
434
- ```
435
-
436
- ### Testing Template (`.env.test`)
437
- ```bash
438
- # Test Configuration
439
- NODE_ENV=test
440
- DEBUG=false
441
- LOG_LEVEL=WARNING
442
-
443
- # Database (In-memory for tests)
444
- DATABASE_URL=sqlite+aiosqlite:///:memory:
445
-
446
- # Services (Mock or local)
447
- QDRANT_HOST=localhost
448
- QDRANT_PORT=6333
449
-
450
- # External APIs (Use test keys or mocks)
451
- GEMINI_API_KEY=test-gemini-api-key
452
-
453
- # Security
454
- JWT_SECRET=test-jwt-secret-32-chars-minimum
455
-
456
- # CORS
457
- CORS_ORIGINS=http://localhost:3000
458
-
459
- # Frontend
460
- VITE_API_BASE_URL=http://localhost:8000
461
- VITE_ENABLE_REGISTRATION=true
462
- ```
463
-
464
- ## Validation and Testing
465
-
466
- ### Environment Validation Script
467
-
468
- Create `scripts/validate-environment.sh`:
469
- ```bash
470
- #!/bin/bash
471
-
472
- # Colors for output
473
- RED='\033[0;31m'
474
- GREEN='\033[0;32m'
475
- YELLOW='\033[1;33m'
476
- NC='\033[0m' # No Color
477
-
478
- # Validation functions
479
- validate_required_var() {
480
- local var_name=$1
481
- local var_value=${!var_name}
482
-
483
- if [[ -z "$var_value" ]]; then
484
- echo -e "${RED}❌ $var_name is not set${NC}"
485
- return 1
486
- else
487
- echo -e "${GREEN}✅ $var_name is set${NC}"
488
- return 0
489
- fi
490
- }
491
-
492
- validate_jwt_secret() {
493
- if [[ ${#JWT_SECRET} -lt 32 ]]; then
494
- echo -e "${RED}❌ JWT_SECRET must be at least 32 characters (current: ${#JWT_SECRET})${NC}"
495
- return 1
496
- else
497
- echo -e "${GREEN}✅ JWT_SECRET length is valid (${#JWT_SECRET} characters)${NC}"
498
- return 0
499
- fi
500
- }
501
-
502
- validate_database_url() {
503
- if [[ "$DATABASE_URL" =~ ^(sqlite|postgresql):// ]]; then
504
- echo -e "${GREEN}✅ DATABASE_URL format is valid${NC}"
505
- return 0
506
- else
507
- echo -e "${RED}❌ DATABASE_URL format is invalid${NC}"
508
- return 1
509
- fi
510
- }
511
-
512
- validate_cors_origins() {
513
- if [[ "$CORS_ORIGINS" =~ ^https?:// ]]; then
514
- echo -e "${GREEN}✅ CORS_ORIGINS format is valid${NC}"
515
- return 0
516
- else
517
- echo -e "${YELLOW}⚠️ CORS_ORIGINS should start with http:// or https://${NC}"
518
- return 0
519
- fi
520
- }
521
-
522
- # Main validation
523
- echo "🔍 Validating environment variables..."
524
- echo
525
-
526
- # Required variables
527
- required_vars=(
528
- "JWT_SECRET"
529
- "DATABASE_URL"
530
- "GEMINI_API_KEY"
531
- )
532
-
533
- validation_failed=false
534
-
535
- for var in "${required_vars[@]}"; do
536
- if ! validate_required_var "$var"; then
537
- validation_failed=true
538
- fi
539
- done
540
-
541
- # Specific validations
542
- if [[ -n "$JWT_SECRET" ]]; then
543
- if ! validate_jwt_secret; then
544
- validation_failed=true
545
- fi
546
- fi
547
-
548
- if [[ -n "$DATABASE_URL" ]]; then
549
- validate_database_url
550
- fi
551
-
552
- if [[ -n "$CORS_ORIGINS" ]]; then
553
- validate_cors_origins
554
- fi
555
-
556
- # Optional variables check
557
- optional_vars=(
558
- "QDRANT_HOST"
559
- "QDRANT_PORT"
560
- "QDRANT_CLOUD_URL"
561
- "QDRANT_API_KEY"
562
- "USER_REGISTRATION_ENABLED"
563
- "CORS_ORIGINS"
564
- )
565
-
566
- echo
567
- echo "📋 Optional variables status:"
568
- for var in "${optional_vars[@]}"; do
569
- if [[ -n "${!var}" ]]; then
570
- echo -e "${GREEN}✅ $var is set${NC}"
571
- else
572
- echo -e "${YELLOW}⚠️ $var is not set${NC}"
573
- fi
574
- done
575
-
576
- echo
577
- if [[ "$validation_failed" == true ]]; then
578
- echo -e "${RED}❌ Environment validation failed${NC}"
579
- exit 1
580
- else
581
- echo -e "${GREEN}✅ Environment validation passed${NC}"
582
- exit 0
583
- fi
584
- ```
585
-
586
- ### Testing Environment Variables
587
-
588
- Create `scripts/test-environment.sh`:
589
- ```bash
590
- #!/bin/bash
591
-
592
- # Test database connection
593
- test_database() {
594
- echo "Testing database connection..."
595
- python -c "
596
- import asyncio
597
- from src.core.database import get_database
598
- async def test():
599
- try:
600
- db = get_database()
601
- print('✅ Database connection successful')
602
- return True
603
- except Exception as e:
604
- print(f'❌ Database connection failed: {e}')
605
- return False
606
- asyncio.run(test())
607
- "
608
- }
609
-
610
- # Test Qdrant connection
611
- test_qdrant() {
612
- echo "Testing Qdrant connection..."
613
- if [[ -n "$QDRANT_CLOUD_URL" ]]; then
614
- curl -f -s "$QDRANT_CLOUD_URL/health" > /dev/null
615
- else
616
- curl -f -s "http://${QDRANT_HOST:-localhost}:${QDRANT_PORT:-6333}/health" > /dev/null
617
- fi
618
-
619
- if [[ $? -eq 0 ]]; then
620
- echo "✅ Qdrant connection successful"
621
- else
622
- echo "❌ Qdrant connection failed"
623
- fi
624
- }
625
-
626
- # Test Gemini API
627
- test_gemini() {
628
- echo "Testing Gemini API..."
629
- python -c "
630
- import os
631
- import requests
632
- api_key = os.getenv('GEMINI_API_KEY')
633
- if not api_key:
634
- print('❌ GEMINI_API_KEY not set')
635
- exit(1)
636
-
637
- try:
638
- # Simple API test
639
- url = f'https://generativelanguage.googleapis.com/v1/models?key={api_key}'
640
- response = requests.get(url, timeout=10)
641
- if response.status_code == 200:
642
- print('✅ Gemini API connection successful')
643
- else:
644
- print(f'❌ Gemini API connection failed: {response.status_code}')
645
- except Exception as e:
646
- print(f'❌ Gemini API connection failed: {e}')
647
- "
648
- }
649
-
650
- # Run all tests
651
- echo "🧪 Testing environment configuration..."
652
- echo
653
-
654
- test_database
655
- test_qdrant
656
- test_gemini
657
-
658
- echo
659
- echo "✅ Environment testing complete"
660
- ```
661
-
662
- ## Security Best Practices
663
-
664
- ### Secret Management Best Practices
665
-
666
- 1. **Never commit secrets to version control**
667
- ```bash
668
- # Add to .gitignore
669
- echo ".env*" >> .gitignore
670
- echo "!.env.example" >> .gitignore
671
- ```
672
-
673
- 2. **Use different secrets for different environments**
674
- ```bash
675
- # Development
676
- JWT_SECRET=dev-secret-32-chars-minimum
677
-
678
- # Production
679
- JWT_SECRET=prod-secret-different-32-chars-minimum
680
- ```
681
-
682
- 3. **Rotate secrets regularly**
683
- ```bash
684
- # Generate new JWT secret
685
- NEW_SECRET=$(openssl rand -base64 32)
686
-
687
- # Update in platform
688
- railway variables set JWT_SECRET="$NEW_SECRET"
689
- ```
690
-
691
- 4. **Use platform-specific secret management**
692
- - Railway: Environment variables (encrypted)
693
- - Fly.io: Secrets (encrypted at rest)
694
- - Google Cloud: Secret Manager
695
- - Vercel: Environment variables (encrypted)
696
-
697
- ### Environment Variable Security
698
-
699
- 1. **Validate environment variables on startup**
700
- ```python
701
- import os
702
- import sys
703
-
704
- def validate_environment():
705
- required_vars = ['JWT_SECRET', 'DATABASE_URL', 'GEMINI_API_KEY']
706
- missing_vars = [var for var in required_vars if not os.getenv(var)]
707
-
708
- if missing_vars:
709
- print(f"Missing required environment variables: {missing_vars}")
710
- sys.exit(1)
711
-
712
- validate_environment()
713
- ```
714
-
715
- 2. **Use secure defaults**
716
- ```python
717
- # Secure defaults
718
- USER_REGISTRATION_ENABLED = os.getenv('USER_REGISTRATION_ENABLED', 'false').lower() == 'true'
719
- DEBUG = os.getenv('DEBUG', 'false').lower() == 'true'
720
- LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
721
- ```
722
-
723
- 3. **Sanitize environment variables in logs**
724
- ```python
725
- import re
726
-
727
- def sanitize_env_for_logging(env_dict):
728
- sensitive_patterns = [
729
- r'.*SECRET.*',
730
- r'.*PASSWORD.*',
731
- r'.*KEY.*',
732
- r'.*TOKEN.*'
733
- ]
734
-
735
- sanitized = {}
736
- for key, value in env_dict.items():
737
- if any(re.match(pattern, key, re.IGNORECASE) for pattern in sensitive_patterns):
738
- sanitized[key] = '***'
739
- else:
740
- sanitized[key] = value
741
-
742
- return sanitized
743
- ```
744
-
745
- ## Troubleshooting
746
-
747
- ### Common Issues
748
-
749
- #### 1. JWT Secret Too Short
750
- ```bash
751
- # Error: JWT secret must be at least 32 characters
752
- # Solution: Generate proper secret
753
- openssl rand -base64 32
754
- ```
755
-
756
- #### 2. Database Connection Failed
757
- ```bash
758
- # Check DATABASE_URL format
759
- echo $DATABASE_URL
760
-
761
- # For SQLite, ensure directory exists
762
- mkdir -p data/
763
-
764
- # For PostgreSQL, test connection
765
- psql "$DATABASE_URL" -c "SELECT 1;"
766
- ```
767
-
768
- #### 3. CORS Issues
769
- ```bash
770
- # Check CORS_ORIGINS format
771
- echo $CORS_ORIGINS
772
-
773
- # Should be: https://domain.com,https://other-domain.com
774
- # Not: https://domain.com, https://other-domain.com (no spaces)
775
- ```
776
-
777
- #### 4. API Key Invalid
778
- ```bash
779
- # Test Gemini API key
780
- curl -H "Authorization: Bearer $GEMINI_API_KEY" \
781
- "https://generativelanguage.googleapis.com/v1/models"
782
- ```
783
-
784
- ### Environment Variable Debugging
785
-
786
- Create `scripts/debug-environment.sh`:
787
- ```bash
788
- #!/bin/bash
789
-
790
- echo "🔍 Environment Variable Debug Information"
791
- echo "========================================"
792
- echo
793
-
794
- echo "📊 System Information:"
795
- echo "OS: $(uname -s)"
796
- echo "Shell: $SHELL"
797
- echo "User: $USER"
798
- echo "PWD: $PWD"
799
- echo
800
-
801
- echo "🔐 Security Variables (sanitized):"
802
- echo "JWT_SECRET: ${JWT_SECRET:0:8}... (${#JWT_SECRET} chars)"
803
- echo "GEMINI_API_KEY: ${GEMINI_API_KEY:0:8}... (${#GEMINI_API_KEY} chars)"
804
- echo
805
-
806
- echo "🗄️ Database Configuration:"
807
- echo "DATABASE_URL: ${DATABASE_URL}"
808
- echo
809
-
810
- echo "🔍 Vector Database Configuration:"
811
- echo "QDRANT_HOST: ${QDRANT_HOST:-not set}"
812
- echo "QDRANT_PORT: ${QDRANT_PORT:-not set}"
813
- echo "QDRANT_CLOUD_URL: ${QDRANT_CLOUD_URL:-not set}"
814
- echo "QDRANT_API_KEY: ${QDRANT_API_KEY:0:8}... (${#QDRANT_API_KEY} chars)"
815
- echo
816
-
817
- echo "🌐 CORS Configuration:"
818
- echo "CORS_ORIGINS: ${CORS_ORIGINS:-not set}"
819
- echo
820
-
821
- echo "⚙️ Application Configuration:"
822
- echo "USER_REGISTRATION_ENABLED: ${USER_REGISTRATION_ENABLED:-not set}"
823
- echo "LOG_LEVEL: ${LOG_LEVEL:-not set}"
824
- echo "DEBUG: ${DEBUG:-not set}"
825
- echo
826
-
827
- echo "🎨 Frontend Configuration:"
828
- echo "VITE_API_BASE_URL: ${VITE_API_BASE_URL:-not set}"
829
- echo "VITE_ENABLE_REGISTRATION: ${VITE_ENABLE_REGISTRATION:-not set}"
830
- ```
831
-
832
- ### Platform-Specific Debugging
833
-
834
- #### Railway
835
- ```bash
836
- # Check current variables
837
- railway variables
838
-
839
- # Check service logs
840
- railway logs
841
-
842
- # Check service status
843
- railway status
844
- ```
845
-
846
- #### Fly.io
847
- ```bash
848
- # Check secrets
849
- flyctl secrets list
850
-
851
- # Check environment variables
852
- flyctl config show
853
-
854
- # Check app status
855
- flyctl status
856
- ```
857
-
858
- #### Google Cloud Run
859
- ```bash
860
- # Check service configuration
861
- gcloud run services describe SERVICE_NAME --region=REGION
862
-
863
- # Check secrets
864
- gcloud secrets list
865
-
866
- # Check logs
867
- gcloud logging read "resource.type=\"cloud_run_revision\""
868
- ```
869
-
870
- #### Vercel
871
- ```bash
872
- # Check environment variables
873
- vercel env ls
874
-
875
- # Check deployment logs
876
- vercel logs
877
-
878
- # Check project settings
879
- vercel project ls
880
- ```
881
-
882
- This comprehensive guide should help you properly configure and manage environment variables and secrets across all deployment platforms.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
FAQ.md DELETED
@@ -1,747 +0,0 @@
1
- # Frequently Asked Questions (FAQ)
2
-
3
- This document addresses common questions about deploying, configuring, and maintaining the Knowledge Assistant RAG application across different platforms.
4
-
5
- ## Table of Contents
6
-
7
- 1. [General Questions](#general-questions)
8
- 2. [Deployment Questions](#deployment-questions)
9
- 3. [Configuration Questions](#configuration-questions)
10
- 4. [Performance Questions](#performance-questions)
11
- 5. [Troubleshooting Questions](#troubleshooting-questions)
12
- 6. [Security Questions](#security-questions)
13
- 7. [Cost and Scaling Questions](#cost-and-scaling-questions)
14
-
15
- ## General Questions
16
-
17
- ### Q: What is the Knowledge Assistant RAG application?
18
-
19
- **A:** The Knowledge Assistant RAG (Retrieval-Augmented Generation) application is a document-based question-answering system that allows users to upload documents, process them into vector embeddings, and query them using natural language. It combines document retrieval with large language model generation to provide accurate, context-aware responses.
20
-
21
- **Key Features:**
22
- - Document upload and processing (PDF, TXT, DOCX, MD)
23
- - Vector-based semantic search using Qdrant
24
- - AI-powered responses using Google Gemini API
25
- - User authentication and document management
26
- - RESTful API with React frontend
27
-
28
- ### Q: What are the system requirements?
29
-
30
- **A:**
31
- **Minimum Requirements:**
32
- - 512MB RAM (with external services)
33
- - 1GB storage
34
- - 1 CPU core
35
- - Internet connection for API services
36
-
37
- **Recommended Requirements:**
38
- - 1GB RAM
39
- - 5GB storage
40
- - 2 CPU cores
41
- - Stable internet connection
42
-
43
- **Development Requirements:**
44
- - Docker and Docker Compose
45
- - Node.js 18+ (for frontend development)
46
- - Python 3.11+ (for backend development)
47
-
48
- ### Q: Which deployment platforms are supported?
49
-
50
- **A:** The application supports multiple deployment platforms:
51
-
52
- 1. **Railway** - Free tier: 512MB RAM, 1GB storage
53
- 2. **Fly.io** - Free tier: 256MB RAM, 1GB storage
54
- 3. **Google Cloud Run** - Free tier: 1GB memory, 2 vCPU
55
- 4. **Vercel** - Hybrid deployment with serverless functions
56
- 5. **Local Docker** - For development and self-hosting
57
-
58
- Each platform has specific optimizations and configurations documented in their respective deployment guides.
59
-
60
- ### Q: What external services are required?
61
-
62
- **A:**
63
- **Required:**
64
- - Google Gemini API (for LLM responses)
65
-
66
- **Optional (but recommended for production):**
67
- - Qdrant Cloud (vector database)
68
- - PostgreSQL (database, instead of SQLite)
69
- - Redis (caching)
70
-
71
- **Free Tier Alternatives:**
72
- - Use SQLite for database (included)
73
- - Self-host Qdrant (included in Docker setup)
74
- - Use in-memory caching instead of Redis
75
-
76
- ## Deployment Questions
77
-
78
- ### Q: How do I choose the best deployment platform?
79
-
80
- **A:** Choose based on your needs:
81
-
82
- **Railway** - Best for beginners
83
- - ✅ Easy setup and deployment
84
- - ✅ Built-in PostgreSQL
85
- - ✅ Good free tier (512MB RAM)
86
- - ❌ Limited to single region
87
-
88
- **Fly.io** - Best for global deployment
89
- - ✅ Multi-region deployment
90
- - ✅ Excellent Docker support
91
- - ✅ Good performance
92
- - ❌ Smaller free tier (256MB RAM)
93
-
94
- **Google Cloud Run** - Best for enterprise
95
- - ✅ Largest free tier (1GB RAM)
96
- - ✅ Excellent scaling
97
- - ✅ Integration with Google services
98
- - ❌ More complex setup
99
-
100
- **Vercel** - Best for frontend-heavy applications
101
- - ✅ Excellent frontend performance
102
- - ✅ Global CDN
103
- - ✅ Serverless functions
104
- - ❌ Backend limitations
105
-
106
- ### Q: Can I deploy without using external APIs?
107
-
108
- **A:** Partially. You can run the application locally with self-hosted services, but you'll need at least one of these for LLM functionality:
109
-
110
- **Options:**
111
- 1. **Google Gemini API** (recommended, free tier available)
112
- 2. **OpenAI API** (paid service)
113
- 3. **Self-hosted Ollama** (requires significant resources, 2GB+ RAM)
114
-
115
- **Note:** The free deployment guides focus on using external APIs to stay within platform resource limits.
116
-
117
- ### Q: How long does deployment take?
118
-
119
- **A:** Deployment times vary by platform:
120
-
121
- - **Railway**: 5-10 minutes (automated)
122
- - **Fly.io**: 10-15 minutes (includes volume creation)
123
- - **Google Cloud Run**: 15-20 minutes (includes infrastructure setup)
124
- - **Vercel**: 5-10 minutes (frontend-focused)
125
- - **Local Docker**: 2-5 minutes (after initial image builds)
126
-
127
- **First-time setup** may take longer due to:
128
- - API key generation
129
- - Platform account setup
130
- - Initial image builds
131
-
132
- ### Q: What happens if deployment fails?
133
-
134
- **A:** Common failure points and solutions:
135
-
136
- 1. **Build Failures**
137
- - Check Docker image compatibility
138
- - Verify all dependencies are available
139
- - Review build logs for specific errors
140
-
141
- 2. **Resource Limits**
142
- - Use external services (Qdrant Cloud, Gemini API)
143
- - Optimize Docker images
144
- - Consider upgrading to paid tier
145
-
146
- 3. **Configuration Errors**
147
- - Validate environment variables
148
- - Check API key permissions
149
- - Verify service connectivity
150
-
151
- **Recovery Steps:**
152
- ```bash
153
- # Check deployment logs
154
- railway logs # or flyctl logs, gcloud logs, etc.
155
-
156
- # Rollback to previous version
157
- railway rollback # or flyctl releases rollback
158
-
159
- # Redeploy with fixes
160
- ./deploy.sh platform-name
161
- ```
162
-
163
- ## Configuration Questions
164
-
165
- ### Q: How do I generate a secure JWT secret?
166
-
167
- **A:** Use one of these methods to generate a secure JWT secret (minimum 32 characters):
168
-
169
- ```bash
170
- # Method 1: OpenSSL (recommended)
171
- openssl rand -base64 32
172
-
173
- # Method 2: Python
174
- python -c "import secrets; print(secrets.token_urlsafe(32))"
175
-
176
- # Method 3: Node.js
177
- node -e "console.log(require('crypto').randomBytes(32).toString('base64'))"
178
- ```
179
-
180
- **Important:**
181
- - Use different secrets for development and production
182
- - Never commit secrets to version control
183
- - Rotate secrets periodically
184
-
185
- ### Q: How do I configure CORS for my domain?
186
-
187
- **A:** Set the `CORS_ORIGINS` environment variable with your domain(s):
188
-
189
- ```bash
190
- # Single domain
191
- CORS_ORIGINS=https://your-domain.com
192
-
193
- # Multiple domains (comma-separated, no spaces)
194
- CORS_ORIGINS=https://your-domain.com,https://www.your-domain.com
195
-
196
- # Development (include localhost)
197
- CORS_ORIGINS=https://your-domain.com,http://localhost:3000
198
- ```
199
-
200
- **Platform-specific setup:**
201
- ```bash
202
- # Railway
203
- railway variables set CORS_ORIGINS="https://your-domain.com"
204
-
205
- # Fly.io
206
- flyctl secrets set CORS_ORIGINS="https://your-domain.com"
207
-
208
- # Google Cloud Run
209
- gcloud run services update SERVICE_NAME \
210
- --set-env-vars="CORS_ORIGINS=https://your-domain.com"
211
- ```
212
-
213
- ### Q: How do I switch from SQLite to PostgreSQL?
214
-
215
- **A:**
216
-
217
- 1. **Update DATABASE_URL:**
218
- ```bash
219
- # From SQLite
220
- DATABASE_URL=sqlite+aiosqlite:///./data/knowledge_assistant.db
221
-
222
- # To PostgreSQL
223
- DATABASE_URL=postgresql://username:password@host:port/database
224
- ```
225
-
226
- 2. **Platform-specific PostgreSQL:**
227
- ```bash
228
- # Railway (automatic)
229
- railway add postgresql
230
- # DATABASE_URL is automatically set
231
-
232
- # Google Cloud Run
233
- # Use Cloud SQL instance connection string
234
-
235
- # Fly.io
236
- flyctl postgres create --name myapp-db
237
- flyctl postgres attach myapp-db
238
- ```
239
-
240
- 3. **Run migrations:**
241
- ```bash
242
- # Migrations will run automatically on startup
243
- # Or manually:
244
- alembic upgrade head
245
- ```
246
-
247
- ### Q: How do I use Qdrant Cloud instead of self-hosted?
248
-
249
- **A:**
250
-
251
- 1. **Sign up for Qdrant Cloud:**
252
- - Visit [cloud.qdrant.io](https://cloud.qdrant.io)
253
- - Create a cluster
254
- - Get your cluster URL and API key
255
-
256
- 2. **Update environment variables:**
257
- ```bash
258
- # Remove self-hosted Qdrant variables
259
- unset QDRANT_HOST
260
- unset QDRANT_PORT
261
-
262
- # Add Qdrant Cloud variables
263
- QDRANT_CLOUD_URL=https://your-cluster-id.qdrant.io
264
- QDRANT_API_KEY=your-api-key
265
- ```
266
-
267
- 3. **Update deployment:**
268
- ```bash
269
- # Set in your platform
270
- railway variables set QDRANT_CLOUD_URL="https://your-cluster.qdrant.io"
271
- railway variables set QDRANT_API_KEY="your-api-key"
272
- ```
273
-
274
- ## Performance Questions
275
-
276
- ### Q: Why is my application slow?
277
-
278
- **A:** Common performance issues and solutions:
279
-
280
- 1. **Slow API Responses**
281
- - Enable response caching
282
- - Use database connection pooling
283
- - Optimize database queries
284
- - Consider using Redis for caching
285
-
286
- 2. **Slow Document Processing**
287
- - Process documents in background tasks
288
- - Use batch processing for multiple documents
289
- - Optimize embedding generation
290
-
291
- 3. **Slow Vector Search**
292
- - Optimize Qdrant configuration
293
- - Use appropriate vector dimensions
294
- - Consider using quantization
295
-
296
- 4. **High Memory Usage**
297
- - Use external services (Qdrant Cloud, Gemini API)
298
- - Implement memory cleanup
299
- - Optimize Docker images
300
-
301
- ### Q: How can I optimize for the free tier limits?
302
-
303
- **A:**
304
-
305
- **Memory Optimization:**
306
- - Use external APIs instead of self-hosted services
307
- - Implement memory cleanup routines
308
- - Use Alpine Linux base images
309
- - Enable auto-scaling to zero
310
-
311
- **Storage Optimization:**
312
- - Use external databases (Railway PostgreSQL, Cloud SQL)
313
- - Implement log rotation
314
- - Clean up temporary files
315
-
316
- **CPU Optimization:**
317
- - Use async processing
318
- - Implement request queuing
319
- - Cache expensive operations
320
-
321
- **Example configuration for Railway free tier:**
322
- ```bash
323
- # Use external services to minimize memory usage
324
- QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
325
- GEMINI_API_KEY=your-api-key
326
- DATABASE_URL=$DATABASE_URL # Railway PostgreSQL
327
-
328
- # Optimize application settings
329
- WORKERS=1
330
- MAX_CONNECTIONS=50
331
- LOG_LEVEL=WARNING
332
- ```
333
-
334
- ### Q: How do I monitor performance?
335
-
336
- **A:**
337
-
338
- **Built-in Monitoring:**
339
- ```bash
340
- # Health check endpoint
341
- curl https://your-app.com/health
342
-
343
- # Detailed health check
344
- curl https://your-app.com/health/detailed
345
- ```
346
-
347
- **Platform Monitoring:**
348
- - **Railway**: Built-in metrics dashboard
349
- - **Fly.io**: `flyctl metrics` command
350
- - **Google Cloud Run**: Cloud Monitoring
351
- - **Vercel**: Analytics dashboard
352
-
353
- **Custom Monitoring:**
354
- ```bash
355
- # Run performance checks
356
- ./scripts/health-check.sh
357
-
358
- # Generate performance report
359
- ./scripts/performance-report.sh
360
- ```
361
-
362
- ## Troubleshooting Questions
363
-
364
- ### Q: My deployment is failing with "out of memory" errors. What should I do?
365
-
366
- **A:**
367
-
368
- **Immediate Solutions:**
369
- 1. **Use external services:**
370
- ```bash
371
- # Replace self-hosted Qdrant with Qdrant Cloud
372
- QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
373
- QDRANT_API_KEY=your-api-key
374
-
375
- # Use Gemini API instead of Ollama
376
- GEMINI_API_KEY=your-api-key
377
- ```
378
-
379
- 2. **Optimize Docker images:**
380
- ```bash
381
- # Use multi-stage builds
382
- # Use Alpine Linux base images
383
- # Remove development dependencies
384
- ```
385
-
386
- 3. **Reduce resource usage:**
387
- ```bash
388
- WORKERS=1
389
- MAX_CONNECTIONS=25
390
- LOG_LEVEL=WARNING
391
- ```
392
-
393
- **Long-term Solutions:**
394
- - Upgrade to paid tier
395
- - Implement horizontal scaling
396
- - Use serverless architecture
397
-
398
- ### Q: Services can't communicate with each other. How do I fix this?
399
-
400
- **A:**
401
-
402
- **Check Service URLs:**
403
- ```bash
404
- # Verify environment variables
405
- echo $QDRANT_HOST
406
- echo $VITE_API_BASE_URL
407
-
408
- # Test connectivity
409
- curl -f http://qdrant:6333/health
410
- curl -f http://backend:8000/health
411
- ```
412
-
413
- **Platform-specific fixes:**
414
-
415
- **Docker Compose:**
416
- ```yaml
417
- # Ensure services are on same network
418
- services:
419
- backend:
420
- environment:
421
- - QDRANT_HOST=qdrant
422
- qdrant:
423
- hostname: qdrant
424
- ```
425
-
426
- **Railway:**
427
- ```bash
428
- # Use Railway internal URLs
429
- QDRANT_HOST=qdrant.railway.internal
430
- ```
431
-
432
- **Fly.io:**
433
- ```bash
434
- # Use Fly.io internal DNS
435
- QDRANT_HOST=qdrant-app.internal
436
- ```
437
-
438
- ### Q: I'm getting CORS errors. How do I fix them?
439
-
440
- **A:**
441
-
442
- **Check CORS Configuration:**
443
- ```bash
444
- # Verify CORS_ORIGINS is set correctly
445
- echo $CORS_ORIGINS
446
-
447
- # Should match your frontend URL exactly
448
- CORS_ORIGINS=https://your-frontend-domain.com
449
- ```
450
-
451
- **Common CORS Issues:**
452
- 1. **Missing protocol:** Use `https://` not just `domain.com`
453
- 2. **Extra spaces:** Use `domain1.com,domain2.com` not `domain1.com, domain2.com`
454
- 3. **Wrong port:** Include port if not standard (`:3000` for development)
455
-
456
- **Test CORS:**
457
- ```bash
458
- # Test CORS preflight
459
- curl -X OPTIONS \
460
- -H "Origin: https://your-frontend.com" \
461
- -H "Access-Control-Request-Method: POST" \
462
- https://your-backend.com/api/query
463
- ```
464
-
465
- ### Q: Database migrations are failing. What should I do?
466
-
467
- **A:**
468
-
469
- **Check Migration Status:**
470
- ```bash
471
- # Check current migration version
472
- alembic current
473
-
474
- # Check migration history
475
- alembic history
476
-
477
- # Check for pending migrations
478
- alembic show head
479
- ```
480
-
481
- **Common Solutions:**
482
- 1. **Reset migrations (DANGEROUS - backup first!):**
483
- ```bash
484
- # Backup database
485
- cp data/knowledge_assistant.db data/backup.db
486
-
487
- # Reset to head
488
- alembic stamp head
489
- ```
490
-
491
- 2. **Manual migration:**
492
- ```bash
493
- # Run specific migration
494
- alembic upgrade +1
495
-
496
- # Downgrade if needed
497
- alembic downgrade -1
498
- ```
499
-
500
- 3. **Fresh database:**
501
- ```bash
502
- # Remove database file
503
- rm data/knowledge_assistant.db
504
-
505
- # Restart application (migrations run automatically)
506
- docker-compose restart backend
507
- ```
508
-
509
- ## Security Questions
510
-
511
- ### Q: How do I secure my deployment?
512
-
513
- **A:**
514
-
515
- **Essential Security Measures:**
516
-
517
- 1. **Use HTTPS everywhere:**
518
- - All platforms provide HTTPS by default
519
- - Never use HTTP in production
520
-
521
- 2. **Secure JWT secrets:**
522
- ```bash
523
- # Generate strong secrets (32+ characters)
524
- JWT_SECRET=$(openssl rand -base64 32)
525
-
526
- # Use different secrets for different environments
527
- ```
528
-
529
- 3. **Restrict CORS origins:**
530
- ```bash
531
- # Don't use wildcards in production
532
- CORS_ORIGINS=https://your-exact-domain.com
533
-
534
- # Not this:
535
- CORS_ORIGINS=*
536
- ```
537
-
538
- 4. **Use environment variables for secrets:**
539
- ```bash
540
- # Never commit secrets to code
541
- # Use platform secret management
542
- railway variables set SECRET_NAME="secret_value"
543
- ```
544
-
545
- 5. **Enable user registration controls:**
546
- ```bash
547
- # Disable registration in production if not needed
548
- USER_REGISTRATION_ENABLED=false
549
- ```
550
-
551
- ### Q: How do I rotate API keys and secrets?
552
-
553
- **A:**
554
-
555
- **JWT Secret Rotation:**
556
- ```bash
557
- # Generate new secret
558
- NEW_JWT_SECRET=$(openssl rand -base64 32)
559
-
560
- # Update in platform
561
- railway variables set JWT_SECRET="$NEW_JWT_SECRET"
562
-
563
- # Restart application
564
- railway service restart
565
- ```
566
-
567
- **API Key Rotation:**
568
- 1. **Generate new API key** from provider
569
- 2. **Update environment variable** in platform
570
- 3. **Test functionality** with new key
571
- 4. **Revoke old key** from provider
572
-
573
- **Database Password Rotation:**
574
- 1. **Create new database user** with new password
575
- 2. **Update DATABASE_URL** with new credentials
576
- 3. **Test connection**
577
- 4. **Remove old database user**
578
-
579
- ### Q: How do I backup my data?
580
-
581
- **A:**
582
-
583
- **SQLite Backup:**
584
- ```bash
585
- # Create backup
586
- sqlite3 data/knowledge_assistant.db ".backup backup-$(date +%Y%m%d).db"
587
-
588
- # Restore from backup
589
- cp backup-20231201.db data/knowledge_assistant.db
590
- ```
591
-
592
- **PostgreSQL Backup:**
593
- ```bash
594
- # Create backup
595
- pg_dump $DATABASE_URL > backup-$(date +%Y%m%d).sql
596
-
597
- # Restore from backup
598
- psql $DATABASE_URL < backup-20231201.sql
599
- ```
600
-
601
- **Qdrant Backup:**
602
- ```bash
603
- # Create snapshot
604
- curl -X POST "http://localhost:6333/collections/documents/snapshots"
605
-
606
- # Download snapshot
607
- curl "http://localhost:6333/collections/documents/snapshots/snapshot-name" > qdrant-backup.snapshot
608
- ```
609
-
610
- **Automated Backup Script:**
611
- ```bash
612
- #!/bin/bash
613
- # backup.sh
614
- DATE=$(date +%Y%m%d)
615
-
616
- # Backup database
617
- sqlite3 data/knowledge_assistant.db ".backup backups/db-$DATE.db"
618
-
619
- # Backup Qdrant data
620
- tar -czf backups/qdrant-$DATE.tar.gz data/qdrant/
621
-
622
- # Clean old backups (keep 7 days)
623
- find backups/ -name "*.db" -mtime +7 -delete
624
- find backups/ -name "*.tar.gz" -mtime +7 -delete
625
- ```
626
-
627
- ## Cost and Scaling Questions
628
-
629
- ### Q: How much does it cost to run this application?
630
-
631
- **A:**
632
-
633
- **Free Tier Costs (Monthly):**
634
- - **Railway**: $0 (512MB RAM, 1GB storage)
635
- - **Fly.io**: $0 (256MB RAM, 1GB storage)
636
- - **Google Cloud Run**: $0 (within free tier limits)
637
- - **Vercel**: $0 (hobby plan)
638
-
639
- **External Service Costs:**
640
- - **Google Gemini API**: Free tier (60 requests/minute)
641
- - **Qdrant Cloud**: Free tier (1GB storage)
642
- - **Domain name**: $10-15/year (optional)
643
-
644
- **Paid Tier Costs (if needed):**
645
- - **Railway Pro**: $5/month (more resources)
646
- - **Fly.io**: Pay-as-you-go (starts ~$2/month)
647
- - **Google Cloud**: Pay-as-you-go (typically $5-20/month)
648
-
649
- ### Q: When should I upgrade from free tier?
650
-
651
- **A:**
652
-
653
- **Upgrade indicators:**
654
- - Consistently hitting memory limits
655
- - Need for more than 1GB storage
656
- - Require custom domains with SSL
657
- - Need better performance/uptime SLAs
658
- - Require more than 100 concurrent users
659
-
660
- **Upgrade benefits:**
661
- - More memory and CPU
662
- - Better performance
663
- - Priority support
664
- - Advanced features (monitoring, backups)
665
- - Higher rate limits
666
-
667
- ### Q: How do I scale the application for more users?
668
-
669
- **A:**
670
-
671
- **Vertical Scaling (increase resources):**
672
- ```bash
673
- # Railway
674
- railway service scale --memory 1024
675
-
676
- # Fly.io
677
- flyctl scale memory 512
678
-
679
- # Google Cloud Run
680
- gcloud run services update SERVICE_NAME --memory=1Gi
681
- ```
682
-
683
- **Horizontal Scaling (more instances):**
684
- ```bash
685
- # Fly.io
686
- flyctl scale count 3
687
-
688
- # Google Cloud Run (automatic based on traffic)
689
- gcloud run services update SERVICE_NAME \
690
- --max-instances=10 \
691
- --concurrency=80
692
- ```
693
-
694
- **Database Scaling:**
695
- - Use connection pooling
696
- - Implement read replicas
697
- - Consider managed database services
698
-
699
- **Caching:**
700
- - Add Redis for application caching
701
- - Use CDN for static assets
702
- - Implement API response caching
703
-
704
- ### Q: How do I monitor costs?
705
-
706
- **A:**
707
-
708
- **Platform Monitoring:**
709
- - **Railway**: Billing dashboard shows usage
710
- - **Fly.io**: `flyctl billing` command
711
- - **Google Cloud**: Cloud Billing console
712
- - **Vercel**: Usage dashboard
713
-
714
- **Cost Alerts:**
715
- ```bash
716
- # Google Cloud billing alerts
717
- gcloud billing budgets create \
718
- --billing-account=BILLING_ACCOUNT_ID \
719
- --display-name="Knowledge Assistant Budget" \
720
- --budget-amount=10USD
721
-
722
- # Fly.io spending limits
723
- flyctl orgs billing-limits set --limit=10
724
- ```
725
-
726
- **Usage Monitoring Script:**
727
- ```bash
728
- #!/bin/bash
729
- # cost-monitor.sh
730
-
731
- echo "📊 Resource Usage Report"
732
- echo "======================="
733
-
734
- # Check memory usage
735
- echo "Memory: $(free -h | grep Mem | awk '{print $3"/"$2}')"
736
-
737
- # Check disk usage
738
- echo "Disk: $(df -h / | tail -1 | awk '{print $3"/"$2" ("$5")"}')"
739
-
740
- # Check request count (from logs)
741
- echo "Requests today: $(grep $(date +%Y-%m-%d) logs/access.log | wc -l)"
742
-
743
- # Estimate costs based on usage
744
- echo "Estimated monthly cost: $0 (free tier)"
745
- ```
746
-
747
- This FAQ covers the most common questions about deploying and managing the Knowledge Assistant RAG application. For more specific issues, refer to the detailed troubleshooting guide or platform-specific documentation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
FLY_DEPLOYMENT.md DELETED
@@ -1,642 +0,0 @@
1
- # Fly.io Deployment Guide
2
-
3
- This guide provides comprehensive instructions for deploying the Knowledge Assistant RAG application to Fly.io, a platform that offers generous free tier resources and excellent Docker support.
4
-
5
- ## Fly.io Resource Limits (Free Tier)
6
-
7
- - **Memory**: 256MB RAM per app (shared across all machines)
8
- - **Storage**: 1GB persistent storage per app
9
- - **Bandwidth**: Unlimited
10
- - **Machines**: Up to 3 shared-cpu-1x machines
11
- - **Regions**: Deploy globally in multiple regions
12
- - **Custom Domains**: Supported with automatic HTTPS
13
-
14
- ## Prerequisites
15
-
16
- ### Required Tools
17
- - [Fly CLI (flyctl)](https://fly.io/docs/getting-started/installing-flyctl/)
18
- - [Docker](https://docs.docker.com/get-docker/)
19
- - [Git](https://git-scm.com/downloads)
20
-
21
- ### Fly.io Account Setup
22
- 1. Sign up at [fly.io](https://fly.io)
23
- 2. Install and authenticate Fly CLI:
24
- ```bash
25
- # Install flyctl
26
- curl -L https://fly.io/install.sh | sh
27
-
28
- # Add to PATH (add to your shell profile)
29
- export PATH="$HOME/.fly/bin:$PATH"
30
-
31
- # Authenticate
32
- flyctl auth login
33
- ```
34
-
35
- ### API Keys Required
36
- - **Google Gemini API Key**: Get from [Google AI Studio](https://makersuite.google.com/app/apikey)
37
-
38
- ## Deployment Strategies
39
-
40
- ### Strategy 1: Single App Deployment (Recommended)
41
-
42
- Deploy backend and frontend as a single Fly.io app with internal routing.
43
-
44
- #### Step 1: Prepare Application
45
-
46
- 1. Clone the repository:
47
- ```bash
48
- git clone <your-repo-url>
49
- cd Knowledge_Assistant_RAG
50
- ```
51
-
52
- 2. Create Fly.io configuration:
53
- ```bash
54
- flyctl launch --no-deploy
55
- ```
56
-
57
- 3. This creates a `fly.toml` file. Replace it with our optimized configuration:
58
- ```toml
59
- app = "knowledge-assistant-rag"
60
- primary_region = "ord"
61
-
62
- [build]
63
- dockerfile = "Dockerfile.fly"
64
-
65
- [env]
66
- PORT = "8080"
67
- DATABASE_URL = "sqlite+aiosqlite:///./data/knowledge_assistant.db"
68
- QDRANT_HOST = "localhost"
69
- QDRANT_PORT = "6333"
70
- USER_REGISTRATION_ENABLED = "true"
71
-
72
- [http_service]
73
- internal_port = 8080
74
- force_https = true
75
- auto_stop_machines = true
76
- auto_start_machines = true
77
- min_machines_running = 0
78
- processes = ["app"]
79
-
80
- [[http_service.checks]]
81
- grace_period = "10s"
82
- interval = "30s"
83
- method = "GET"
84
- timeout = "5s"
85
- path = "/health"
86
-
87
- [mounts]
88
- source = "knowledge_data"
89
- destination = "/app/data"
90
-
91
- [[vm]]
92
- memory = "256mb"
93
- cpu_kind = "shared"
94
- cpus = 1
95
- ```
96
-
97
- #### Step 2: Create Optimized Dockerfile
98
-
99
- Create `Dockerfile.fly` for single-app deployment:
100
- ```dockerfile
101
- # Multi-stage build for optimized production image
102
- FROM node:18-alpine AS frontend-builder
103
-
104
- WORKDIR /app/frontend
105
- COPY rag-quest-hub/package*.json ./
106
- RUN npm ci --only=production
107
-
108
- COPY rag-quest-hub/ ./
109
- RUN npm run build
110
-
111
- FROM python:3.11-alpine AS backend-builder
112
-
113
- WORKDIR /app
114
- RUN apk add --no-cache gcc musl-dev libffi-dev
115
-
116
- COPY requirements.txt .
117
- RUN pip install --no-cache-dir -r requirements.txt
118
-
119
- FROM python:3.11-alpine AS qdrant
120
-
121
- RUN apk add --no-cache curl
122
- RUN curl -L https://github.com/qdrant/qdrant/releases/latest/download/qdrant-x86_64-unknown-linux-musl.tar.gz | tar xz
123
- RUN mv qdrant /usr/local/bin/
124
-
125
- FROM python:3.11-alpine AS production
126
-
127
- # Install runtime dependencies
128
- RUN apk add --no-cache nginx supervisor curl
129
-
130
- # Copy Python dependencies
131
- COPY --from=backend-builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
132
- COPY --from=backend-builder /usr/local/bin /usr/local/bin
133
-
134
- # Copy Qdrant binary
135
- COPY --from=qdrant /usr/local/bin/qdrant /usr/local/bin/
136
-
137
- # Copy application code
138
- WORKDIR /app
139
- COPY src/ ./src/
140
- COPY alembic/ ./alembic/
141
- COPY alembic.ini ./
142
-
143
- # Copy frontend build
144
- COPY --from=frontend-builder /app/frontend/dist ./static/
145
-
146
- # Create nginx configuration
147
- RUN mkdir -p /etc/nginx/conf.d
148
- COPY <<EOF /etc/nginx/conf.d/default.conf
149
- server {
150
- listen 8080;
151
- server_name _;
152
-
153
- # Serve static frontend files
154
- location / {
155
- root /app/static;
156
- try_files \$uri \$uri/ /index.html;
157
- }
158
-
159
- # Proxy API requests to backend
160
- location /api/ {
161
- proxy_pass http://localhost:8000/;
162
- proxy_set_header Host \$host;
163
- proxy_set_header X-Real-IP \$remote_addr;
164
- proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
165
- proxy_set_header X-Forwarded-Proto \$scheme;
166
- }
167
-
168
- # Health check endpoint
169
- location /health {
170
- proxy_pass http://localhost:8000/health;
171
- }
172
- }
173
- EOF
174
-
175
- # Create supervisor configuration
176
- COPY <<EOF /etc/supervisor/conf.d/supervisord.conf
177
- [supervisord]
178
- nodaemon=true
179
- user=root
180
-
181
- [program:qdrant]
182
- command=/usr/local/bin/qdrant --config-path /app/qdrant-config.yaml
183
- autostart=true
184
- autorestart=true
185
- stdout_logfile=/dev/stdout
186
- stdout_logfile_maxbytes=0
187
- stderr_logfile=/dev/stderr
188
- stderr_logfile_maxbytes=0
189
-
190
- [program:backend]
191
- command=python -m uvicorn src.main:app --host 0.0.0.0 --port 8000
192
- directory=/app
193
- autostart=true
194
- autorestart=true
195
- stdout_logfile=/dev/stdout
196
- stdout_logfile_maxbytes=0
197
- stderr_logfile=/dev/stderr
198
- stderr_logfile_maxbytes=0
199
-
200
- [program:nginx]
201
- command=nginx -g "daemon off;"
202
- autostart=true
203
- autorestart=true
204
- stdout_logfile=/dev/stdout
205
- stdout_logfile_maxbytes=0
206
- stderr_logfile=/dev/stderr
207
- stderr_logfile_maxbytes=0
208
- EOF
209
-
210
- # Create Qdrant configuration
211
- COPY <<EOF /app/qdrant-config.yaml
212
- service:
213
- http_port: 6333
214
- grpc_port: 6334
215
- host: 0.0.0.0
216
-
217
- storage:
218
- storage_path: /app/data/qdrant
219
-
220
- cluster:
221
- enabled: false
222
- EOF
223
-
224
- # Create data directory
225
- RUN mkdir -p /app/data/qdrant
226
-
227
- EXPOSE 8080
228
-
229
- CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
230
- ```
231
-
232
- #### Step 3: Create Persistent Volume
233
-
234
- ```bash
235
- # Create volume for data persistence
236
- flyctl volumes create knowledge_data --region ord --size 1
237
- ```
238
-
239
- #### Step 4: Set Secrets
240
-
241
- ```bash
242
- # Set required secrets
243
- flyctl secrets set JWT_SECRET=$(openssl rand -base64 32)
244
- flyctl secrets set GEMINI_API_KEY=your-gemini-api-key-here
245
-
246
- # Optional: Set CORS origins for production
247
- flyctl secrets set CORS_ORIGINS=https://your-app.fly.dev
248
- ```
249
-
250
- #### Step 5: Deploy
251
-
252
- ```bash
253
- # Deploy the application
254
- flyctl deploy
255
-
256
- # Check deployment status
257
- flyctl status
258
-
259
- # View logs
260
- flyctl logs
261
- ```
262
-
263
- ### Strategy 2: Multi-App Deployment
264
-
265
- Deploy each service as separate Fly.io apps for better resource isolation.
266
-
267
- ⚠️ **Note**: This approach uses more resources and may exceed free tier limits.
268
-
269
- #### Backend App
270
-
271
- 1. Create backend app:
272
- ```bash
273
- mkdir fly-backend && cd fly-backend
274
- flyctl launch --name knowledge-assistant-backend --no-deploy
275
- ```
276
-
277
- 2. Configure `fly.toml`:
278
- ```toml
279
- app = "knowledge-assistant-backend"
280
- primary_region = "ord"
281
-
282
- [build]
283
- dockerfile = "../Dockerfile"
284
-
285
- [env]
286
- DATABASE_URL = "sqlite+aiosqlite:///./data/knowledge_assistant.db"
287
- QDRANT_HOST = "knowledge-assistant-qdrant.internal"
288
- QDRANT_PORT = "6333"
289
-
290
- [http_service]
291
- internal_port = 8000
292
- force_https = true
293
- auto_stop_machines = true
294
- auto_start_machines = true
295
- min_machines_running = 0
296
-
297
- [mounts]
298
- source = "backend_data"
299
- destination = "/app/data"
300
-
301
- [[vm]]
302
- memory = "128mb"
303
- cpu_kind = "shared"
304
- cpus = 1
305
- ```
306
-
307
- #### Qdrant App
308
-
309
- 1. Create Qdrant app:
310
- ```bash
311
- mkdir fly-qdrant && cd fly-qdrant
312
- flyctl launch --name knowledge-assistant-qdrant --no-deploy
313
- ```
314
-
315
- 2. Configure `fly.toml`:
316
- ```toml
317
- app = "knowledge-assistant-qdrant"
318
- primary_region = "ord"
319
-
320
- [build]
321
- image = "qdrant/qdrant:latest"
322
-
323
- [env]
324
- QDRANT__SERVICE__HTTP_PORT = "6333"
325
- QDRANT__SERVICE__GRPC_PORT = "6334"
326
-
327
- [http_service]
328
- internal_port = 6333
329
- auto_stop_machines = false
330
- auto_start_machines = true
331
- min_machines_running = 1
332
-
333
- [mounts]
334
- source = "qdrant_data"
335
- destination = "/qdrant/storage"
336
-
337
- [[vm]]
338
- memory = "64mb"
339
- cpu_kind = "shared"
340
- cpus = 1
341
- ```
342
-
343
- #### Frontend App
344
-
345
- 1. Create frontend app:
346
- ```bash
347
- mkdir fly-frontend && cd fly-frontend
348
- flyctl launch --name knowledge-assistant-frontend --no-deploy
349
- ```
350
-
351
- 2. Configure `fly.toml`:
352
- ```toml
353
- app = "knowledge-assistant-frontend"
354
- primary_region = "ord"
355
-
356
- [build]
357
- dockerfile = "../rag-quest-hub/Dockerfile"
358
-
359
- [env]
360
- VITE_API_BASE_URL = "https://knowledge-assistant-backend.fly.dev"
361
-
362
- [http_service]
363
- internal_port = 80
364
- force_https = true
365
- auto_stop_machines = true
366
- auto_start_machines = true
367
- min_machines_running = 0
368
-
369
- [[vm]]
370
- memory = "64mb"
371
- cpu_kind = "shared"
372
- cpus = 1
373
- ```
374
-
375
- ## Database Configuration
376
-
377
- ### SQLite (Default)
378
- - Uses persistent volumes for data storage
379
- - Suitable for single-instance deployments
380
- - Automatic backups with volume snapshots
381
-
382
- ### PostgreSQL (Optional)
383
- ```bash
384
- # Add PostgreSQL to your app
385
- flyctl postgres create --name knowledge-assistant-db
386
-
387
- # Attach to your app
388
- flyctl postgres attach knowledge-assistant-db
389
-
390
- # Update environment variable
391
- flyctl secrets set DATABASE_URL=postgresql://...
392
- ```
393
-
394
- ## External Service Alternatives
395
-
396
- ### Qdrant Cloud
397
- For better resource utilization:
398
- ```bash
399
- flyctl secrets set QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
400
- flyctl secrets set QDRANT_API_KEY=your-api-key
401
- ```
402
-
403
- ### Google Gemini API
404
- Already configured by default:
405
- ```bash
406
- flyctl secrets set GEMINI_API_KEY=your-gemini-api-key
407
- ```
408
-
409
- ## Monitoring and Maintenance
410
-
411
- ### Health Checks
412
- ```bash
413
- # Check app status
414
- flyctl status
415
-
416
- # View logs
417
- flyctl logs
418
-
419
- # Monitor metrics
420
- flyctl metrics
421
- ```
422
-
423
- ### Scaling
424
- ```bash
425
- # Scale machines
426
- flyctl scale count 2
427
-
428
- # Scale memory
429
- flyctl scale memory 512
430
-
431
- # Scale to zero (cost optimization)
432
- flyctl scale count 0
433
- ```
434
-
435
- ### Updates
436
- ```bash
437
- # Deploy updates
438
- flyctl deploy
439
-
440
- # Rollback if needed
441
- flyctl releases rollback
442
- ```
443
-
444
- ## Cost Optimization
445
-
446
- ### Free Tier Management
447
- - Use single-app deployment to stay within limits
448
- - Enable auto-stop for cost savings
449
- - Monitor resource usage in dashboard
450
-
451
- ### Resource Optimization
452
- - Use Alpine Linux base images
453
- - Minimize memory allocation
454
- - Enable machine auto-stop/start
455
-
456
- ## Troubleshooting
457
-
458
- ### Common Issues
459
-
460
- #### 1. Memory Limit Exceeded
461
- ```bash
462
- # Check memory usage
463
- flyctl metrics
464
-
465
- # Solutions:
466
- # - Reduce memory allocation in fly.toml
467
- # - Use external services (Qdrant Cloud)
468
- # - Optimize Docker images
469
- ```
470
-
471
- #### 2. Volume Mount Issues
472
- ```bash
473
- # Check volumes
474
- flyctl volumes list
475
-
476
- # Create volume if missing
477
- flyctl volumes create knowledge_data --size 1
478
- ```
479
-
480
- #### 3. Service Communication
481
- ```bash
482
- # Check internal DNS
483
- flyctl ssh console
484
- nslookup knowledge-assistant-qdrant.internal
485
-
486
- # Update service URLs in configuration
487
- ```
488
-
489
- #### 4. Build Failures
490
- ```bash
491
- # Check build logs
492
- flyctl logs --app knowledge-assistant-rag
493
-
494
- # Common fixes:
495
- # - Verify Dockerfile syntax
496
- # - Check base image availability
497
- # - Ensure all files are included
498
- ```
499
-
500
- ### Debug Commands
501
- ```bash
502
- # SSH into machine
503
- flyctl ssh console
504
-
505
- # Check running processes
506
- flyctl ssh console -C "ps aux"
507
-
508
- # View configuration
509
- flyctl config show
510
-
511
- # Check machine status
512
- flyctl machine list
513
- ```
514
-
515
- ## Security Considerations
516
-
517
- ### Secrets Management
518
- - Use `flyctl secrets` for sensitive data
519
- - Never commit secrets to version control
520
- - Rotate secrets regularly
521
-
522
- ### Network Security
523
- - Internal services use `.internal` domains
524
- - HTTPS enforced by default
525
- - Private networking between apps
526
-
527
- ### Access Control
528
- - Use Fly.io organizations for team access
529
- - Implement proper authentication in application
530
- - Monitor access logs
531
-
532
- ## Backup and Recovery
533
-
534
- ### Volume Snapshots
535
- ```bash
536
- # Create snapshot
537
- flyctl volumes snapshots create knowledge_data
538
-
539
- # List snapshots
540
- flyctl volumes snapshots list knowledge_data
541
-
542
- # Restore from snapshot
543
- flyctl volumes create knowledge_data_restore --snapshot-id snap_xxx
544
- ```
545
-
546
- ### Database Backups
547
- ```bash
548
- # For SQLite
549
- flyctl ssh console -C "sqlite3 /app/data/knowledge_assistant.db .dump" > backup.sql
550
-
551
- # For PostgreSQL
552
- flyctl postgres db dump knowledge-assistant-db > backup.sql
553
- ```
554
-
555
- ## Performance Optimization
556
-
557
- ### Cold Start Optimization
558
- - Keep minimum machines running for critical services
559
- - Use smaller base images
560
- - Optimize application startup time
561
-
562
- ### Regional Deployment
563
- ```bash
564
- # Deploy to multiple regions
565
- flyctl regions add lax sea
566
-
567
- # Check current regions
568
- flyctl regions list
569
- ```
570
-
571
- ### Caching
572
- - Enable HTTP caching for static assets
573
- - Use Redis for application caching (if needed)
574
- - Implement proper cache headers
575
-
576
- ## Migration from Other Platforms
577
-
578
- ### From Railway
579
- 1. Export environment variables
580
- 2. Create Fly.io apps with similar configuration
581
- 3. Migrate data using volume snapshots
582
- 4. Update DNS records
583
-
584
- ### From Docker Compose
585
- 1. Convert docker-compose.yml to fly.toml
586
- 2. Create separate apps for each service
587
- 3. Configure internal networking
588
- 4. Deploy and test
589
-
590
- ## Support and Resources
591
-
592
- ### Getting Help
593
- - [Fly.io Documentation](https://fly.io/docs/)
594
- - [Fly.io Community Forum](https://community.fly.io/)
595
- - [Fly.io Discord](https://discord.gg/fly)
596
-
597
- ### Useful Commands
598
- ```bash
599
- # Get help
600
- flyctl help
601
-
602
- # Check account status
603
- flyctl auth whoami
604
-
605
- # View billing
606
- flyctl billing
607
-
608
- # Monitor apps
609
- flyctl apps list
610
- ```
611
-
612
- ## Architecture Diagram
613
-
614
- ### Single App Deployment
615
- ```
616
- ┌─────────────────────────────────────┐
617
- │ Fly.io Machine │
618
- │ ┌─────────────┐ ┌─────────────┐ │
619
- │ │ nginx │ │ Backend │ │
620
- │ │ (Port 8080) │ │ (Port 8000) │ │
621
- │ └─────────────┘ └─────────────┘ │
622
- │ ┌─────────────┐ ┌─────────────┐ │
623
- │ │ Qdrant │ │ SQLite │ │
624
- │ │ (Port 6333) │ │ Database │ │
625
- │ └─────────────┘ └─────────────┘ │
626
- │ │
627
- │ Volume: /app/data (1GB) │
628
- └─────────────────────────────────────┘
629
- ```
630
-
631
- ### Multi-App Deployment
632
- ```
633
- ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
634
- │ Frontend │ │ Backend │ │ Qdrant │
635
- │ (Fly App) │────│ (Fly App) │────│ (Fly App) │
636
- │ │ │ │ │ │
637
- │ React + nginx │ │ FastAPI + DB │ │ Vector Database │
638
- │ (64MB RAM) │ │ (128MB RAM) ��� │ (64MB RAM) │
639
- └─────────────────┘ └─────────────────┘ └─────────────────┘
640
- ```
641
-
642
- This deployment provides a cost-effective, scalable solution for running the Knowledge Assistant RAG application on Fly.io's free tier with excellent global performance.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
PERFORMANCE_OPTIMIZATION.md DELETED
@@ -1,1295 +0,0 @@
1
- # Performance Optimization and Scaling Guidelines
2
-
3
- This guide provides comprehensive strategies for optimizing performance and scaling the Knowledge Assistant RAG application across different deployment platforms and usage scenarios.
4
-
5
- ## Table of Contents
6
-
7
- 1. [Performance Monitoring](#performance-monitoring)
8
- 2. [Container Optimization](#container-optimization)
9
- 3. [Database Performance](#database-performance)
10
- 4. [API Optimization](#api-optimization)
11
- 5. [Frontend Performance](#frontend-performance)
12
- 6. [Vector Database Optimization](#vector-database-optimization)
13
- 7. [LLM Service Optimization](#llm-service-optimization)
14
- 8. [Scaling Strategies](#scaling-strategies)
15
- 9. [Platform-Specific Optimizations](#platform-specific-optimizations)
16
- 10. [Cost Optimization](#cost-optimization)
17
-
18
- ## Performance Monitoring
19
-
20
- ### Key Performance Indicators (KPIs)
21
-
22
- #### Application Metrics
23
- ```bash
24
- # Response Time Targets
25
- - API Response Time: < 200ms (95th percentile)
26
- - Document Upload: < 5s for 10MB files
27
- - Query Processing: < 2s for complex queries
28
- - Vector Search: < 100ms for similarity search
29
-
30
- # Throughput Targets
31
- - Concurrent Users: 100+ simultaneous users
32
- - Requests per Second: 1000+ RPS
33
- - Document Processing: 10+ documents/minute
34
- ```
35
-
36
- #### Resource Metrics
37
- ```bash
38
- # Memory Usage
39
- - Backend: < 256MB baseline, < 512MB peak
40
- - Frontend: < 64MB
41
- - Qdrant: < 128MB for 10k documents
42
-
43
- # CPU Usage
44
- - Backend: < 50% average, < 80% peak
45
- - Database: < 30% average
46
- - Vector Operations: < 70% during indexing
47
- ```
48
-
49
- ### Monitoring Implementation
50
-
51
- #### Application Performance Monitoring (APM)
52
- ```python
53
- # Add to src/core/monitoring.py
54
- import time
55
- import psutil
56
- from functools import wraps
57
- from typing import Dict, Any
58
- import logging
59
-
60
- logger = logging.getLogger(__name__)
61
-
62
- class PerformanceMonitor:
63
- def __init__(self):
64
- self.metrics = {}
65
-
66
- def track_request_time(self, endpoint: str):
67
- def decorator(func):
68
- @wraps(func)
69
- async def wrapper(*args, **kwargs):
70
- start_time = time.time()
71
- try:
72
- result = await func(*args, **kwargs)
73
- duration = time.time() - start_time
74
- self.record_metric(f"{endpoint}_duration", duration)
75
- return result
76
- except Exception as e:
77
- duration = time.time() - start_time
78
- self.record_metric(f"{endpoint}_error_duration", duration)
79
- raise
80
- return wrapper
81
- return decorator
82
-
83
- def record_metric(self, name: str, value: float):
84
- if name not in self.metrics:
85
- self.metrics[name] = []
86
- self.metrics[name].append({
87
- 'value': value,
88
- 'timestamp': time.time()
89
- })
90
-
91
- # Keep only last 1000 measurements
92
- if len(self.metrics[name]) > 1000:
93
- self.metrics[name] = self.metrics[name][-1000:]
94
-
95
- def get_system_metrics(self) -> Dict[str, Any]:
96
- return {
97
- 'cpu_percent': psutil.cpu_percent(),
98
- 'memory_percent': psutil.virtual_memory().percent,
99
- 'disk_usage': psutil.disk_usage('/').percent,
100
- 'network_io': psutil.net_io_counters()._asdict()
101
- }
102
-
103
- # Usage in FastAPI
104
- from fastapi import FastAPI
105
- from src.core.monitoring import PerformanceMonitor
106
-
107
- app = FastAPI()
108
- monitor = PerformanceMonitor()
109
-
110
- @app.get("/health")
111
- @monitor.track_request_time("health_check")
112
- async def health_check():
113
- return {
114
- "status": "healthy",
115
- "metrics": monitor.get_system_metrics()
116
- }
117
- ```
118
-
119
- #### Health Check Endpoints
120
- ```python
121
- # Enhanced health check with performance metrics
122
- @app.get("/health/detailed")
123
- async def detailed_health_check():
124
- start_time = time.time()
125
-
126
- # Test database connection
127
- db_start = time.time()
128
- try:
129
- await test_database_connection()
130
- db_time = time.time() - db_start
131
- db_status = "healthy"
132
- except Exception as e:
133
- db_time = time.time() - db_start
134
- db_status = f"unhealthy: {str(e)}"
135
-
136
- # Test Qdrant connection
137
- qdrant_start = time.time()
138
- try:
139
- await test_qdrant_connection()
140
- qdrant_time = time.time() - qdrant_start
141
- qdrant_status = "healthy"
142
- except Exception as e:
143
- qdrant_time = time.time() - qdrant_start
144
- qdrant_status = f"unhealthy: {str(e)}"
145
-
146
- total_time = time.time() - start_time
147
-
148
- return {
149
- "status": "healthy" if db_status == "healthy" and qdrant_status == "healthy" else "degraded",
150
- "checks": {
151
- "database": {"status": db_status, "response_time": db_time},
152
- "qdrant": {"status": qdrant_status, "response_time": qdrant_time}
153
- },
154
- "metrics": monitor.get_system_metrics(),
155
- "total_response_time": total_time
156
- }
157
- ```
158
-
159
- ## Container Optimization
160
-
161
- ### Multi-Stage Docker Builds
162
-
163
- #### Optimized Backend Dockerfile
164
- ```dockerfile
165
- # Build stage
166
- FROM python:3.11-slim as builder
167
-
168
- WORKDIR /app
169
-
170
- # Install build dependencies
171
- RUN apt-get update && apt-get install -y \
172
- gcc \
173
- g++ \
174
- && rm -rf /var/lib/apt/lists/*
175
-
176
- # Install Python dependencies
177
- COPY requirements.txt .
178
- RUN pip install --no-cache-dir --user -r requirements.txt
179
-
180
- # Production stage
181
- FROM python:3.11-slim
182
-
183
- # Install runtime dependencies only
184
- RUN apt-get update && apt-get install -y \
185
- curl \
186
- && rm -rf /var/lib/apt/lists/*
187
-
188
- # Copy Python packages from builder
189
- COPY --from=builder /root/.local /root/.local
190
-
191
- # Copy application code
192
- WORKDIR /app
193
- COPY src/ ./src/
194
- COPY alembic/ ./alembic/
195
- COPY alembic.ini ./
196
-
197
- # Create non-root user
198
- RUN useradd --create-home --shell /bin/bash app
199
- RUN chown -R app:app /app
200
- USER app
201
-
202
- # Make sure scripts in .local are usable
203
- ENV PATH=/root/.local/bin:$PATH
204
-
205
- EXPOSE 8000
206
-
207
- CMD ["python", "-m", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
208
- ```
209
-
210
- #### Optimized Frontend Dockerfile
211
- ```dockerfile
212
- # Build stage
213
- FROM node:18-alpine as builder
214
-
215
- WORKDIR /app
216
-
217
- # Copy package files
218
- COPY package*.json ./
219
- RUN npm ci --only=production
220
-
221
- # Copy source and build
222
- COPY . .
223
- RUN npm run build
224
-
225
- # Production stage
226
- FROM nginx:alpine
227
-
228
- # Copy built assets
229
- COPY --from=builder /app/dist /usr/share/nginx/html
230
-
231
- # Copy optimized nginx configuration
232
- COPY nginx.conf /etc/nginx/nginx.conf
233
-
234
- # Add health check
235
- HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
236
- CMD curl -f http://localhost/ || exit 1
237
-
238
- EXPOSE 80
239
-
240
- CMD ["nginx", "-g", "daemon off;"]
241
- ```
242
-
243
- ### Image Size Optimization
244
-
245
- #### Before and After Comparison
246
- ```bash
247
- # Before optimization
248
- REPOSITORY TAG SIZE
249
- knowledge-assistant-backend latest 7.84GB
250
- knowledge-assistant-frontend latest 579MB
251
-
252
- # After optimization
253
- REPOSITORY TAG SIZE
254
- knowledge-assistant-backend latest 156MB # 98% reduction
255
- knowledge-assistant-frontend latest 23MB # 96% reduction
256
- ```
257
-
258
- #### Optimization Techniques
259
- ```dockerfile
260
- # Use Alpine Linux base images
261
- FROM python:3.11-alpine instead of python:3.11
262
-
263
- # Multi-stage builds to exclude build dependencies
264
- FROM node:18-alpine as builder
265
- # ... build steps ...
266
- FROM nginx:alpine as production
267
-
268
- # Minimize layers and combine RUN commands
269
- RUN apk add --no-cache curl \
270
- && pip install --no-cache-dir -r requirements.txt \
271
- && rm -rf /var/cache/apk/*
272
-
273
- # Use .dockerignore to exclude unnecessary files
274
- echo "node_modules" >> .dockerignore
275
- echo ".git" >> .dockerignore
276
- echo "*.md" >> .dockerignore
277
- echo "tests/" >> .dockerignore
278
- ```
279
-
280
- ## Database Performance
281
-
282
- ### SQLite Optimization
283
-
284
- #### Configuration Tuning
285
- ```python
286
- # src/core/database.py
287
- from sqlalchemy import create_engine
288
- from sqlalchemy.pool import StaticPool
289
-
290
- # Optimized SQLite configuration
291
- DATABASE_CONFIG = {
292
- "pool_pre_ping": True,
293
- "pool_recycle": 300,
294
- "poolclass": StaticPool,
295
- "connect_args": {
296
- "check_same_thread": False,
297
- "timeout": 20,
298
- "isolation_level": None,
299
- },
300
- "echo": False, # Disable SQL logging in production
301
- }
302
-
303
- # SQLite PRAGMA optimizations
304
- async def optimize_sqlite_connection(connection):
305
- await connection.execute("PRAGMA journal_mode=WAL")
306
- await connection.execute("PRAGMA synchronous=NORMAL")
307
- await connection.execute("PRAGMA cache_size=10000")
308
- await connection.execute("PRAGMA temp_store=MEMORY")
309
- await connection.execute("PRAGMA mmap_size=268435456") # 256MB
310
- ```
311
-
312
- #### Indexing Strategy
313
- ```sql
314
- -- Create indexes for common queries
315
- CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id);
316
- CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at);
317
- CREATE INDEX IF NOT EXISTS idx_documents_title ON documents(title);
318
-
319
- -- Composite indexes for complex queries
320
- CREATE INDEX IF NOT EXISTS idx_documents_user_created ON documents(user_id, created_at);
321
-
322
- -- Full-text search index
323
- CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
324
- title, content, content=documents, content_rowid=id
325
- );
326
- ```
327
-
328
- ### PostgreSQL Optimization
329
-
330
- #### Connection Pooling
331
- ```python
332
- # Optimized PostgreSQL configuration
333
- DATABASE_CONFIG = {
334
- "pool_size": 5,
335
- "max_overflow": 10,
336
- "pool_pre_ping": True,
337
- "pool_recycle": 3600,
338
- "echo": False,
339
- }
340
-
341
- # Connection pool monitoring
342
- from sqlalchemy import event
343
- from sqlalchemy.pool import Pool
344
-
345
- @event.listens_for(Pool, "connect")
346
- def set_postgresql_pragma(dbapi_connection, connection_record):
347
- with dbapi_connection.cursor() as cursor:
348
- # Optimize for read-heavy workloads
349
- cursor.execute("SET default_transaction_isolation TO 'read committed'")
350
- cursor.execute("SET statement_timeout TO '30s'")
351
- cursor.execute("SET lock_timeout TO '10s'")
352
- ```
353
-
354
- #### Query Optimization
355
- ```python
356
- # Use database-specific optimizations
357
- from sqlalchemy import text
358
-
359
- # Efficient pagination
360
- async def get_documents_paginated(db, user_id: int, offset: int, limit: int):
361
- query = text("""
362
- SELECT id, title, content, created_at
363
- FROM documents
364
- WHERE user_id = :user_id
365
- ORDER BY created_at DESC
366
- LIMIT :limit OFFSET :offset
367
- """)
368
-
369
- result = await db.execute(query, {
370
- "user_id": user_id,
371
- "limit": limit,
372
- "offset": offset
373
- })
374
- return result.fetchall()
375
-
376
- # Use EXPLAIN ANALYZE to optimize queries
377
- async def analyze_query_performance(db, query: str):
378
- explain_query = f"EXPLAIN ANALYZE {query}"
379
- result = await db.execute(text(explain_query))
380
- return result.fetchall()
381
- ```
382
-
383
- ## API Optimization
384
-
385
- ### Response Caching
386
-
387
- #### In-Memory Caching
388
- ```python
389
- from functools import lru_cache
390
- from typing import Optional
391
- import hashlib
392
- import json
393
-
394
- class QueryCache:
395
- def __init__(self, max_size: int = 1000):
396
- self.cache = {}
397
- self.max_size = max_size
398
-
399
- def _generate_key(self, query: str, filters: dict) -> str:
400
- cache_data = {"query": query, "filters": filters}
401
- return hashlib.md5(json.dumps(cache_data, sort_keys=True).encode()).hexdigest()
402
-
403
- def get(self, query: str, filters: dict) -> Optional[dict]:
404
- key = self._generate_key(query, filters)
405
- return self.cache.get(key)
406
-
407
- def set(self, query: str, filters: dict, result: dict, ttl: int = 300):
408
- if len(self.cache) >= self.max_size:
409
- # Remove oldest entry
410
- oldest_key = next(iter(self.cache))
411
- del self.cache[oldest_key]
412
-
413
- key = self._generate_key(query, filters)
414
- self.cache[key] = {
415
- "result": result,
416
- "expires_at": time.time() + ttl
417
- }
418
-
419
- def is_expired(self, entry: dict) -> bool:
420
- return time.time() > entry["expires_at"]
421
-
422
- # Usage in API endpoints
423
- query_cache = QueryCache()
424
-
425
- @app.post("/query")
426
- async def query_documents(request: QueryRequest):
427
- # Check cache first
428
- cached_result = query_cache.get(request.query, request.filters)
429
- if cached_result and not query_cache.is_expired(cached_result):
430
- return cached_result["result"]
431
-
432
- # Process query
433
- result = await process_query(request.query, request.filters)
434
-
435
- # Cache result
436
- query_cache.set(request.query, request.filters, result)
437
-
438
- return result
439
- ```
440
-
441
- #### Redis Caching (Optional)
442
- ```python
443
- import redis
444
- import json
445
- from typing import Optional
446
-
447
- class RedisCache:
448
- def __init__(self, redis_url: str = "redis://localhost:6379"):
449
- self.redis_client = redis.from_url(redis_url)
450
-
451
- async def get(self, key: str) -> Optional[dict]:
452
- try:
453
- cached_data = self.redis_client.get(key)
454
- if cached_data:
455
- return json.loads(cached_data)
456
- except Exception as e:
457
- logger.warning(f"Redis get error: {e}")
458
- return None
459
-
460
- async def set(self, key: str, value: dict, ttl: int = 300):
461
- try:
462
- self.redis_client.setex(key, ttl, json.dumps(value))
463
- except Exception as e:
464
- logger.warning(f"Redis set error: {e}")
465
- ```
466
-
467
- ### Request Optimization
468
-
469
- #### Async Processing
470
- ```python
471
- import asyncio
472
- from concurrent.futures import ThreadPoolExecutor
473
-
474
- # Process multiple documents concurrently
475
- async def process_documents_batch(documents: List[str]) -> List[dict]:
476
- semaphore = asyncio.Semaphore(5) # Limit concurrent processing
477
-
478
- async def process_single_document(doc: str) -> dict:
479
- async with semaphore:
480
- return await process_document(doc)
481
-
482
- tasks = [process_single_document(doc) for doc in documents]
483
- results = await asyncio.gather(*tasks, return_exceptions=True)
484
-
485
- # Filter out exceptions
486
- return [result for result in results if not isinstance(result, Exception)]
487
-
488
- # Background task processing
489
- from fastapi import BackgroundTasks
490
-
491
- @app.post("/upload-batch")
492
- async def upload_documents_batch(
493
- files: List[UploadFile],
494
- background_tasks: BackgroundTasks
495
- ):
496
- # Return immediately with task ID
497
- task_id = generate_task_id()
498
-
499
- # Process in background
500
- background_tasks.add_task(process_documents_batch, files, task_id)
501
-
502
- return {"task_id": task_id, "status": "processing"}
503
- ```
504
-
505
- #### Request Validation and Sanitization
506
- ```python
507
- from pydantic import BaseModel, validator
508
- from typing import Optional, List
509
-
510
- class QueryRequest(BaseModel):
511
- query: str
512
- limit: Optional[int] = 10
513
- filters: Optional[dict] = {}
514
-
515
- @validator('query')
516
- def validate_query(cls, v):
517
- if len(v.strip()) < 3:
518
- raise ValueError('Query must be at least 3 characters long')
519
- if len(v) > 1000:
520
- raise ValueError('Query too long (max 1000 characters)')
521
- return v.strip()
522
-
523
- @validator('limit')
524
- def validate_limit(cls, v):
525
- if v is not None and (v < 1 or v > 100):
526
- raise ValueError('Limit must be between 1 and 100')
527
- return v
528
- ```
529
-
530
- ## Frontend Performance
531
-
532
- ### Bundle Optimization
533
-
534
- #### Vite Configuration
535
- ```typescript
536
- // vite.config.ts
537
- import { defineConfig } from 'vite'
538
- import react from '@vitejs/plugin-react'
539
- import { visualizer } from 'rollup-plugin-visualizer'
540
-
541
- export default defineConfig({
542
- plugins: [
543
- react(),
544
- visualizer({
545
- filename: 'dist/stats.html',
546
- open: true,
547
- gzipSize: true,
548
- brotliSize: true,
549
- })
550
- ],
551
- build: {
552
- rollupOptions: {
553
- output: {
554
- manualChunks: {
555
- vendor: ['react', 'react-dom'],
556
- ui: ['@radix-ui/react-dialog', '@radix-ui/react-dropdown-menu'],
557
- utils: ['date-fns', 'clsx', 'tailwind-merge']
558
- }
559
- }
560
- },
561
- chunkSizeWarningLimit: 1000,
562
- minify: 'terser',
563
- terserOptions: {
564
- compress: {
565
- drop_console: true,
566
- drop_debugger: true
567
- }
568
- }
569
- },
570
- server: {
571
- port: 3000,
572
- host: true
573
- }
574
- })
575
- ```
576
-
577
- #### Code Splitting
578
- ```typescript
579
- // Lazy load components
580
- import { lazy, Suspense } from 'react'
581
-
582
- const Dashboard = lazy(() => import('./pages/Dashboard'))
583
- const DocumentUpload = lazy(() => import('./components/DocumentUpload'))
584
- const ChatInterface = lazy(() => import('./components/ChatInterface'))
585
-
586
- function App() {
587
- return (
588
- <Suspense fallback={<div>Loading...</div>}>
589
- <Routes>
590
- <Route path="/dashboard" element={<Dashboard />} />
591
- <Route path="/upload" element={<DocumentUpload />} />
592
- <Route path="/chat" element={<ChatInterface />} />
593
- </Routes>
594
- </Suspense>
595
- )
596
- }
597
- ```
598
-
599
- ### React Performance Optimization
600
-
601
- #### Memoization
602
- ```typescript
603
- import { memo, useMemo, useCallback } from 'react'
604
-
605
- // Memoize expensive components
606
- const DocumentList = memo(({ documents, onSelect }) => {
607
- const sortedDocuments = useMemo(() => {
608
- return documents.sort((a, b) =>
609
- new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
610
- )
611
- }, [documents])
612
-
613
- const handleSelect = useCallback((doc) => {
614
- onSelect(doc.id)
615
- }, [onSelect])
616
-
617
- return (
618
- <div>
619
- {sortedDocuments.map(doc => (
620
- <DocumentItem
621
- key={doc.id}
622
- document={doc}
623
- onSelect={handleSelect}
624
- />
625
- ))}
626
- </div>
627
- )
628
- })
629
-
630
- // Optimize re-renders with React.memo
631
- const DocumentItem = memo(({ document, onSelect }) => {
632
- return (
633
- <div onClick={() => onSelect(document)}>
634
- {document.title}
635
- </div>
636
- )
637
- })
638
- ```
639
-
640
- #### Virtual Scrolling
641
- ```typescript
642
- import { FixedSizeList as List } from 'react-window'
643
-
644
- const VirtualizedDocumentList = ({ documents }) => {
645
- const Row = ({ index, style }) => (
646
- <div style={style}>
647
- <DocumentItem document={documents[index]} />
648
- </div>
649
- )
650
-
651
- return (
652
- <List
653
- height={600}
654
- itemCount={documents.length}
655
- itemSize={80}
656
- width="100%"
657
- >
658
- {Row}
659
- </List>
660
- )
661
- }
662
- ```
663
-
664
- ### API Client Optimization
665
-
666
- #### Request Deduplication
667
- ```typescript
668
- class APIClient {
669
- private pendingRequests = new Map<string, Promise<any>>()
670
-
671
- async request(url: string, options: RequestInit = {}) {
672
- const key = `${options.method || 'GET'}:${url}:${JSON.stringify(options.body)}`
673
-
674
- if (this.pendingRequests.has(key)) {
675
- return this.pendingRequests.get(key)
676
- }
677
-
678
- const promise = fetch(url, options)
679
- .then(response => response.json())
680
- .finally(() => {
681
- this.pendingRequests.delete(key)
682
- })
683
-
684
- this.pendingRequests.set(key, promise)
685
- return promise
686
- }
687
- }
688
- ```
689
-
690
- #### Request Batching
691
- ```typescript
692
- class BatchedAPIClient {
693
- private batchQueue: Array<{
694
- query: string
695
- resolve: (result: any) => void
696
- reject: (error: any) => void
697
- }> = []
698
- private batchTimeout: NodeJS.Timeout | null = null
699
-
700
- async query(query: string): Promise<any> {
701
- return new Promise((resolve, reject) => {
702
- this.batchQueue.push({ query, resolve, reject })
703
-
704
- if (this.batchTimeout) {
705
- clearTimeout(this.batchTimeout)
706
- }
707
-
708
- this.batchTimeout = setTimeout(() => {
709
- this.processBatch()
710
- }, 50) // Batch requests for 50ms
711
- })
712
- }
713
-
714
- private async processBatch() {
715
- if (this.batchQueue.length === 0) return
716
-
717
- const batch = [...this.batchQueue]
718
- this.batchQueue = []
719
- this.batchTimeout = null
720
-
721
- try {
722
- const queries = batch.map(item => item.query)
723
- const results = await this.sendBatchRequest(queries)
724
-
725
- batch.forEach((item, index) => {
726
- item.resolve(results[index])
727
- })
728
- } catch (error) {
729
- batch.forEach(item => {
730
- item.reject(error)
731
- })
732
- }
733
- }
734
- }
735
- ```
736
-
737
- ## Vector Database Optimization
738
-
739
- ### Qdrant Performance Tuning
740
-
741
- #### Configuration Optimization
742
- ```yaml
743
- # qdrant-config.yaml
744
- service:
745
- http_port: 6333
746
- grpc_port: 6334
747
- host: 0.0.0.0
748
-
749
- storage:
750
- storage_path: /qdrant/storage
751
- snapshots_path: /qdrant/snapshots
752
-
753
- # Performance optimizations
754
- wal_capacity_mb: 32
755
- wal_segments_ahead: 0
756
-
757
- # Memory optimization
758
- memmap_threshold_kb: 65536
759
- indexing_threshold_kb: 20000
760
-
761
- cluster:
762
- enabled: false
763
-
764
- # Collection configuration for optimal performance
765
- collection_config:
766
- vectors:
767
- size: 1536 # For OpenAI embeddings
768
- distance: Cosine
769
-
770
- # Optimize for search performance
771
- hnsw_config:
772
- m: 16
773
- ef_construct: 100
774
- full_scan_threshold: 10000
775
-
776
- # Optimize for memory usage
777
- quantization_config:
778
- scalar:
779
- type: int8
780
- quantile: 0.99
781
- always_ram: true
782
- ```
783
-
784
- #### Indexing Strategy
785
- ```python
786
- from qdrant_client import QdrantClient
787
- from qdrant_client.models import Distance, VectorParams, OptimizersConfig
788
-
789
- async def create_optimized_collection(client: QdrantClient, collection_name: str):
790
- await client.create_collection(
791
- collection_name=collection_name,
792
- vectors_config=VectorParams(
793
- size=1536,
794
- distance=Distance.COSINE
795
- ),
796
- optimizers_config=OptimizersConfig(
797
- deleted_threshold=0.2,
798
- vacuum_min_vector_number=1000,
799
- default_segment_number=0,
800
- max_segment_size_kb=None,
801
- memmap_threshold_kb=None,
802
- indexing_threshold_kb=20000,
803
- flush_interval_sec=5,
804
- max_optimization_threads=1
805
- ),
806
- hnsw_config={
807
- "m": 16,
808
- "ef_construct": 100,
809
- "full_scan_threshold": 10000,
810
- "max_indexing_threads": 0,
811
- "on_disk": False
812
- }
813
- )
814
- ```
815
-
816
- #### Batch Operations
817
- ```python
818
- async def batch_upsert_vectors(
819
- client: QdrantClient,
820
- collection_name: str,
821
- vectors: List[dict],
822
- batch_size: int = 100
823
- ):
824
- """Efficiently upsert vectors in batches"""
825
- for i in range(0, len(vectors), batch_size):
826
- batch = vectors[i:i + batch_size]
827
-
828
- points = [
829
- {
830
- "id": vector["id"],
831
- "vector": vector["embedding"],
832
- "payload": vector["metadata"]
833
- }
834
- for vector in batch
835
- ]
836
-
837
- await client.upsert(
838
- collection_name=collection_name,
839
- points=points,
840
- wait=False # Don't wait for indexing
841
- )
842
-
843
- # Wait for all operations to complete
844
- await client.create_snapshot(collection_name)
845
- ```
846
-
847
- ### Embedding Optimization
848
-
849
- #### Caching Strategy
850
- ```python
851
- import hashlib
852
- from typing import Dict, List, Optional
853
-
854
- class EmbeddingCache:
855
- def __init__(self, max_size: int = 10000):
856
- self.cache: Dict[str, List[float]] = {}
857
- self.max_size = max_size
858
-
859
- def _get_cache_key(self, text: str) -> str:
860
- return hashlib.md5(text.encode()).hexdigest()
861
-
862
- def get(self, text: str) -> Optional[List[float]]:
863
- key = self._get_cache_key(text)
864
- return self.cache.get(key)
865
-
866
- def set(self, text: str, embedding: List[float]):
867
- if len(self.cache) >= self.max_size:
868
- # Remove oldest entry (simple FIFO)
869
- oldest_key = next(iter(self.cache))
870
- del self.cache[oldest_key]
871
-
872
- key = self._get_cache_key(text)
873
- self.cache[key] = embedding
874
-
875
- # Usage in embedding service
876
- embedding_cache = EmbeddingCache()
877
-
878
- async def get_embeddings_with_cache(texts: List[str]) -> List[List[float]]:
879
- embeddings = []
880
- texts_to_embed = []
881
- cache_indices = []
882
-
883
- # Check cache first
884
- for i, text in enumerate(texts):
885
- cached_embedding = embedding_cache.get(text)
886
- if cached_embedding:
887
- embeddings.append(cached_embedding)
888
- else:
889
- embeddings.append(None)
890
- texts_to_embed.append(text)
891
- cache_indices.append(i)
892
-
893
- # Generate embeddings for uncached texts
894
- if texts_to_embed:
895
- new_embeddings = await generate_embeddings(texts_to_embed)
896
-
897
- # Update cache and results
898
- for i, embedding in enumerate(new_embeddings):
899
- cache_index = cache_indices[i]
900
- embeddings[cache_index] = embedding
901
- embedding_cache.set(texts_to_embed[i], embedding)
902
-
903
- return embeddings
904
- ```
905
-
906
- ## LLM Service Optimization
907
-
908
- ### Google Gemini API Optimization
909
-
910
- #### Request Batching
911
- ```python
912
- import asyncio
913
- from typing import List, Dict, Any
914
-
915
- class GeminiAPIOptimizer:
916
- def __init__(self, api_key: str, max_concurrent: int = 5):
917
- self.api_key = api_key
918
- self.semaphore = asyncio.Semaphore(max_concurrent)
919
- self.request_queue = []
920
-
921
- async def generate_response_batch(
922
- self,
923
- prompts: List[str],
924
- **kwargs
925
- ) -> List[str]:
926
- """Process multiple prompts concurrently with rate limiting"""
927
-
928
- async def process_single_prompt(prompt: str) -> str:
929
- async with self.semaphore:
930
- return await self.generate_response(prompt, **kwargs)
931
-
932
- tasks = [process_single_prompt(prompt) for prompt in prompts]
933
- results = await asyncio.gather(*tasks, return_exceptions=True)
934
-
935
- # Handle exceptions
936
- processed_results = []
937
- for result in results:
938
- if isinstance(result, Exception):
939
- logger.error(f"Gemini API error: {result}")
940
- processed_results.append("Error processing request")
941
- else:
942
- processed_results.append(result)
943
-
944
- return processed_results
945
-
946
- async def generate_response(self, prompt: str, **kwargs) -> str:
947
- """Single request with retry logic"""
948
- max_retries = 3
949
- base_delay = 1
950
-
951
- for attempt in range(max_retries):
952
- try:
953
- response = await self._make_api_request(prompt, **kwargs)
954
- return response
955
- except Exception as e:
956
- if attempt == max_retries - 1:
957
- raise
958
-
959
- delay = base_delay * (2 ** attempt)
960
- await asyncio.sleep(delay)
961
-
962
- raise Exception("Max retries exceeded")
963
- ```
964
-
965
- #### Response Caching
966
- ```python
967
- class LLMResponseCache:
968
- def __init__(self, ttl: int = 3600): # 1 hour TTL
969
- self.cache = {}
970
- self.ttl = ttl
971
-
972
- def _get_cache_key(self, prompt: str, **kwargs) -> str:
973
- cache_data = {"prompt": prompt, **kwargs}
974
- return hashlib.md5(json.dumps(cache_data, sort_keys=True).encode()).hexdigest()
975
-
976
- def get(self, prompt: str, **kwargs) -> Optional[str]:
977
- key = self._get_cache_key(prompt, **kwargs)
978
- entry = self.cache.get(key)
979
-
980
- if entry and time.time() - entry["timestamp"] < self.ttl:
981
- return entry["response"]
982
-
983
- # Remove expired entry
984
- if entry:
985
- del self.cache[key]
986
-
987
- return None
988
-
989
- def set(self, prompt: str, response: str, **kwargs):
990
- key = self._get_cache_key(prompt, **kwargs)
991
- self.cache[key] = {
992
- "response": response,
993
- "timestamp": time.time()
994
- }
995
- ```
996
-
997
- ## Scaling Strategies
998
-
999
- ### Horizontal Scaling
1000
-
1001
- #### Load Balancing Configuration
1002
- ```yaml
1003
- # nginx.conf for load balancing
1004
- upstream backend_servers {
1005
- least_conn;
1006
- server backend1:8000 weight=1 max_fails=3 fail_timeout=30s;
1007
- server backend2:8000 weight=1 max_fails=3 fail_timeout=30s;
1008
- server backend3:8000 weight=1 max_fails=3 fail_timeout=30s;
1009
- }
1010
-
1011
- server {
1012
- listen 80;
1013
-
1014
- location /api/ {
1015
- proxy_pass http://backend_servers;
1016
- proxy_set_header Host $host;
1017
- proxy_set_header X-Real-IP $remote_addr;
1018
- proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
1019
-
1020
- # Health check
1021
- proxy_next_upstream error timeout invalid_header http_500 http_502 http_503;
1022
- proxy_connect_timeout 5s;
1023
- proxy_send_timeout 10s;
1024
- proxy_read_timeout 30s;
1025
- }
1026
- }
1027
- ```
1028
-
1029
- #### Database Scaling
1030
- ```python
1031
- # Read/Write splitting for PostgreSQL
1032
- from sqlalchemy import create_engine
1033
- from sqlalchemy.orm import sessionmaker
1034
-
1035
- class DatabaseManager:
1036
- def __init__(self, write_url: str, read_urls: List[str]):
1037
- self.write_engine = create_engine(write_url)
1038
- self.read_engines = [create_engine(url) for url in read_urls]
1039
- self.current_read_index = 0
1040
-
1041
- def get_write_session(self):
1042
- Session = sessionmaker(bind=self.write_engine)
1043
- return Session()
1044
-
1045
- def get_read_session(self):
1046
- # Round-robin read replicas
1047
- engine = self.read_engines[self.current_read_index]
1048
- self.current_read_index = (self.current_read_index + 1) % len(self.read_engines)
1049
-
1050
- Session = sessionmaker(bind=engine)
1051
- return Session()
1052
- ```
1053
-
1054
- ### Vertical Scaling
1055
-
1056
- #### Resource Allocation Guidelines
1057
- ```yaml
1058
- # Kubernetes resource allocation
1059
- apiVersion: apps/v1
1060
- kind: Deployment
1061
- metadata:
1062
- name: knowledge-assistant-backend
1063
- spec:
1064
- replicas: 3
1065
- template:
1066
- spec:
1067
- containers:
1068
- - name: backend
1069
- image: knowledge-assistant-backend:latest
1070
- resources:
1071
- requests:
1072
- memory: "256Mi"
1073
- cpu: "250m"
1074
- limits:
1075
- memory: "512Mi"
1076
- cpu: "500m"
1077
- env:
1078
- - name: WORKERS
1079
- value: "2" # 2 workers per container
1080
- - name: MAX_CONNECTIONS
1081
- value: "100"
1082
- ```
1083
-
1084
- ### Auto-Scaling Configuration
1085
-
1086
- #### Platform-Specific Auto-Scaling
1087
-
1088
- **Google Cloud Run:**
1089
- ```yaml
1090
- apiVersion: serving.knative.dev/v1
1091
- kind: Service
1092
- metadata:
1093
- name: knowledge-assistant-backend
1094
- annotations:
1095
- run.googleapis.com/execution-environment: gen2
1096
- spec:
1097
- template:
1098
- metadata:
1099
- annotations:
1100
- autoscaling.knative.dev/minScale: "0"
1101
- autoscaling.knative.dev/maxScale: "100"
1102
- run.googleapis.com/cpu-throttling: "false"
1103
- spec:
1104
- containerConcurrency: 80
1105
- timeoutSeconds: 300
1106
- containers:
1107
- - image: gcr.io/project/knowledge-assistant-backend
1108
- resources:
1109
- limits:
1110
- cpu: "1000m"
1111
- memory: "1Gi"
1112
- ```
1113
-
1114
- **Fly.io Auto-Scaling:**
1115
- ```toml
1116
- # fly.toml
1117
- [http_service]
1118
- internal_port = 8000
1119
- force_https = true
1120
- auto_stop_machines = true
1121
- auto_start_machines = true
1122
- min_machines_running = 0
1123
- processes = ["app"]
1124
-
1125
- [[http_service.checks]]
1126
- grace_period = "10s"
1127
- interval = "30s"
1128
- method = "GET"
1129
- timeout = "5s"
1130
- path = "/health"
1131
-
1132
- [metrics]
1133
- port = 9091
1134
- path = "/metrics"
1135
- ```
1136
-
1137
- ## Platform-Specific Optimizations
1138
-
1139
- ### Railway Optimizations
1140
-
1141
- #### Memory Management
1142
- ```python
1143
- # Optimize for Railway's 512MB limit
1144
- import gc
1145
- import psutil
1146
-
1147
- class MemoryManager:
1148
- def __init__(self, threshold_percent: float = 80):
1149
- self.threshold_percent = threshold_percent
1150
-
1151
- def check_memory_usage(self):
1152
- memory_percent = psutil.virtual_memory().percent
1153
- if memory_percent > self.threshold_percent:
1154
- self.cleanup_memory()
1155
-
1156
- def cleanup_memory(self):
1157
- # Clear caches
1158
- if hasattr(self, 'query_cache'):
1159
- self.query_cache.clear()
1160
- if hasattr(self, 'embedding_cache'):
1161
- self.embedding_cache.clear()
1162
-
1163
- # Force garbage collection
1164
- gc.collect()
1165
-
1166
- logger.info(f"Memory cleanup completed. Usage: {psutil.virtual_memory().percent}%")
1167
-
1168
- # Use in API endpoints
1169
- memory_manager = MemoryManager()
1170
-
1171
- @app.middleware("http")
1172
- async def memory_check_middleware(request: Request, call_next):
1173
- memory_manager.check_memory_usage()
1174
- response = await call_next(request)
1175
- return response
1176
- ```
1177
-
1178
- ### Fly.io Optimizations
1179
-
1180
- #### Multi-Region Deployment
1181
- ```bash
1182
- # Deploy to multiple regions
1183
- flyctl regions add lax sea fra
1184
-
1185
- # Check current regions
1186
- flyctl regions list
1187
-
1188
- # Configure region-specific scaling
1189
- flyctl scale count 2 --region ord
1190
- flyctl scale count 1 --region lax
1191
- flyctl scale count 1 --region sea
1192
- ```
1193
-
1194
- ### Google Cloud Run Optimizations
1195
-
1196
- #### Cold Start Optimization
1197
- ```python
1198
- # Minimize cold start time
1199
- import asyncio
1200
- from contextlib import asynccontextmanager
1201
-
1202
- # Pre-initialize services
1203
- @asynccontextmanager
1204
- async def lifespan(app: FastAPI):
1205
- # Startup
1206
- await initialize_database()
1207
- await initialize_qdrant_client()
1208
- await warm_up_gemini_api()
1209
-
1210
- yield
1211
-
1212
- # Shutdown
1213
- await cleanup_resources()
1214
-
1215
- app = FastAPI(lifespan=lifespan)
1216
-
1217
- async def warm_up_gemini_api():
1218
- """Warm up Gemini API with a simple request"""
1219
- try:
1220
- await generate_response("Hello", max_tokens=1)
1221
- except Exception:
1222
- pass # Ignore warm-up failures
1223
- ```
1224
-
1225
- ## Cost Optimization
1226
-
1227
- ### Resource Usage Monitoring
1228
-
1229
- #### Cost Tracking Script
1230
- ```bash
1231
- #!/bin/bash
1232
- # cost-monitor.sh
1233
-
1234
- echo "📊 Resource Usage Report - $(date)"
1235
- echo "=================================="
1236
-
1237
- # Memory usage
1238
- echo "💾 Memory Usage:"
1239
- free -h | grep -E "(Mem|Swap)"
1240
-
1241
- # Disk usage
1242
- echo -e "\n💽 Disk Usage:"
1243
- df -h | grep -E "(Filesystem|/dev/)"
1244
-
1245
- # Docker resource usage
1246
- echo -e "\n🐳 Container Resource Usage:"
1247
- docker stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}\t{{.BlockIO}}"
1248
-
1249
- # Database size
1250
- echo -e "\n🗄️ Database Size:"
1251
- if [ -f "data/knowledge_assistant.db" ]; then
1252
- du -sh data/knowledge_assistant.db
1253
- fi
1254
-
1255
- # Log file sizes
1256
- echo -e "\n📝 Log File Sizes:"
1257
- find logs/ -name "*.log" -exec du -sh {} \; 2>/dev/null | sort -hr
1258
-
1259
- echo -e "\n✅ Report complete"
1260
- ```
1261
-
1262
- ### Cost-Effective Architecture Patterns
1263
-
1264
- #### Serverless-First Approach
1265
- ```python
1266
- # Design for serverless with minimal cold start
1267
- class ServerlessOptimizedApp:
1268
- def __init__(self):
1269
- self.db_connection = None
1270
- self.qdrant_client = None
1271
- self.llm_client = None
1272
-
1273
- async def get_db_connection(self):
1274
- if not self.db_connection:
1275
- self.db_connection = await create_database_connection()
1276
- return self.db_connection
1277
-
1278
- async def get_qdrant_client(self):
1279
- if not self.qdrant_client:
1280
- self.qdrant_client = await create_qdrant_client()
1281
- return self.qdrant_client
1282
-
1283
- async def process_request(self, request):
1284
- # Lazy initialization
1285
- db = await self.get_db_connection()
1286
- qdrant = await self.get_qdrant_client()
1287
-
1288
- # Process request
1289
- return await handle_request(request, db, qdrant)
1290
-
1291
- # Global instance for serverless
1292
- app_instance = ServerlessOptimizedApp()
1293
- ```
1294
-
1295
- This comprehensive performance optimization guide provides strategies for maximizing the efficiency and scalability of the Knowledge Assistant RAG application across all deployment platforms while maintaining cost-effectiveness.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
SUMMARY.md DELETED
@@ -1,129 +0,0 @@
1
- # Project Summary: Phases 1 & 2
2
-
3
- This document summarizes the work completed in the first two phases of the RAG Knowledge Assistant project.
4
-
5
- ---
6
-
7
- ## Phase 1: Research & Setup
8
-
9
- Phase 1 focused on establishing a fully containerized and automated local development environment.
10
-
11
- ### Key Achievements:
12
-
13
- 1. **Project Structure:**
14
- - `src/`: Contains all the Python source code for the backend API.
15
- - `uploads/`: A directory for temporarily storing uploaded files during processing.
16
- - `scripts/`: Holds utility scripts, such as the automated model puller for Ollama.
17
-
18
- 2. **Dependency Management:**
19
- - A `requirements.txt` file was created to manage all Python dependencies, including FastAPI, LangChain, Qdrant, and Sentence-Transformers.
20
-
21
- 3. **Containerization with Docker:**
22
- - A `Dockerfile` was written to create a container image for our FastAPI application.
23
- - A `docker-compose.yml` file orchestrates all the necessary services:
24
- - `backend`: Our FastAPI application.
25
- - `qdrant`: The vector database for storing document embeddings.
26
- - `ollama`: The service for running the open-source LLM.
27
-
28
- 4. **Automated Model Pulling:**
29
- - An entrypoint script (`scripts/ollama_entrypoint.sh`) was created to automatically pull the `llama3` model when the Ollama container starts. This ensures the LLM is ready without manual intervention.
30
-
31
- ---
32
-
33
- ## Phase 2: Backend API MVP
34
-
35
- Phase 2 focused on building the core functionality of the knowledge assistant, resulting in a functional RAG pipeline accessible via a REST API.
36
-
37
- ### Key Achievements:
38
-
39
- 1. **Modular Codebase:**
40
- - The `src/core/` directory was created to organize the application's business logic into separate, manageable modules:
41
- - `processing.py`: Handles PDF parsing, text chunking, and embedding model loading.
42
- - `vector_store.py`: Manages all interactions with the Qdrant database (creation, upserting, searching).
43
- - `llm.py`: Handles all interactions with the Ollama LLM service (prompt formatting, response generation).
44
- - `models.py`: Defines the Pydantic models for API request and response data structures.
45
-
46
- 2. **API Endpoints Implemented:**
47
- - **`GET /health`**: A simple endpoint to confirm that the API is running.
48
- - **`POST /upload`**: Implements the full document ingestion pipeline:
49
- 1. Receives and validates a PDF file.
50
- 2. Extracts text using `PyMuPDF`.
51
- 3. Splits the text into smaller, overlapping chunks using `LangChain`.
52
- 4. Generates vector embeddings for each chunk using `sentence-transformers`.
53
- 5. Upserts the chunks and their embeddings into the Qdrant database.
54
- - **`POST /query`**: Implements the complete RAG pipeline to answer questions:
55
- 1. Receives a JSON object with a `query` string.
56
- 2. Generates an embedding for the query.
57
- 3. Searches Qdrant to retrieve the most relevant document chunks (Retrieval).
58
- 4. Constructs a detailed prompt containing the user's query and the retrieved context.
59
- 5. Sends the prompt to the `llama3` model via Ollama to get an answer (Augmented Generation).
60
- 6. Returns the generated answer along with the source documents used for context.
61
-
62
- ---
63
-
64
- ## Development Log (Continuous)
65
-
66
- This section tracks the detailed implementation steps and troubleshooting throughout the project.
67
-
68
- 1. **Initial Scaffolding**: Created `requirements.txt` and a basic FastAPI app in `src/main.py`.
69
- 2. **Containerization**: Wrote a `Dockerfile` for the backend and a `docker-compose.yml` to orchestrate the `backend`, `qdrant`, and `ollama` services.
70
- 3. **Code Modularization**: Refactored the application logic into a `src/core` directory with distinct modules for `processing.py`, `vector_store.py`, `llm.py`, and `models.py`.
71
- 4. **Ingestion Pipeline (`/upload`)**: Implemented the full document ingestion flow: PDF parsing -> Text Chunking -> Embedding -> Storage in Qdrant.
72
- 5. **RAG Pipeline (`/query`)**: Implemented the query flow: Query Embedding -> Vector Search -> Prompt Formatting -> LLM Generation -> Response with Sources.
73
- 6. **Automation & Troubleshooting**:
74
- - **Automated Model Pulling**: Created `scripts/ollama_entrypoint.sh` to automatically check for the server and pull the `llama3` model on startup, removing a manual setup step.
75
- - **Fixed `curl` Dependency**: Added `curl` installation to the Ollama entrypoint script to resolve a `command not found` error.
76
- - **Fixed Service Race Condition**: Created `scripts/wait-for-qdrant.sh` and updated the backend's entrypoint in `docker-compose.yml` to ensure the backend waits for Qdrant to be healthy before starting. This fixed a `timed out` connection error.
77
-
78
- ---
79
-
80
- ## How to Test the Backend MVP
81
-
82
- You can interact with the API using `curl` in your terminal.
83
-
84
- **Step 1: Start the Services**
85
-
86
- Open your terminal in the project's root directory and run:
87
-
88
- ```bash
89
- docker-compose up --build
90
- ```
91
-
92
- This will build the images and start all three services. The first time you run this, it will take a few minutes to download the `llama3` model. You can monitor the logs to see the progress.
93
-
94
- **Step 2: Test the `/upload` Endpoint**
95
-
96
- Once the services are running, use the following `curl` command to upload a PDF file. Replace `"/path/to/your/document.pdf"` with the actual path to your file.
97
-
98
- ```bash
99
- curl -X POST -F "file=@/path/to/your/document.pdf" http://localhost:8000/upload
100
- ```
101
-
102
- * **Expected Response**: A JSON object confirming the upload was successful.
103
- ```json
104
- {"filename":"document.pdf","message":"Successfully uploaded, processed, and stored.","num_chunks_stored":25}
105
- ```
106
-
107
- **Step 3: Test the `/query` Endpoint**
108
-
109
- After uploading a document, you can ask questions about its content. Replace `"Your question about the document"` with your query.
110
-
111
- ```bash
112
- curl -X POST -H "Content-Type: application/json" \
113
- -d '{"query": "Your question about the document"}' \
114
- http://localhost:8000/query
115
- ```
116
-
117
- * **Expected Response**: A JSON object containing the LLM's answer and the source document chunks used to generate it.
118
- ```json
119
- {
120
- "answer": "This is the answer generated by the LLM based on the document.",
121
- "source_documents": [
122
- {
123
- "source": "document.pdf",
124
- "text": "A relevant chunk of text from the source document...",
125
- "score": 0.91
126
- }
127
- ]
128
- }
129
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
TROUBLESHOOTING.md DELETED
@@ -1,894 +0,0 @@
1
- # Troubleshooting and Maintenance Guide
2
-
3
- This comprehensive guide covers common deployment issues, solutions, and maintenance procedures for the Knowledge Assistant RAG application across all supported platforms.
4
-
5
- ## Table of Contents
6
-
7
- 1. [Common Deployment Issues](#common-deployment-issues)
8
- 2. [Platform-Specific Issues](#platform-specific-issues)
9
- 3. [Environment Variables and Secrets](#environment-variables-and-secrets)
10
- 4. [Performance Optimization](#performance-optimization)
11
- 5. [Database Issues](#database-issues)
12
- 6. [Service Communication Problems](#service-communication-problems)
13
- 7. [Monitoring and Logging](#monitoring-and-logging)
14
- 8. [Maintenance Procedures](#maintenance-procedures)
15
- 9. [Emergency Recovery](#emergency-recovery)
16
-
17
- ## Common Deployment Issues
18
-
19
- ### 1. Container Build Failures
20
-
21
- #### Symptoms
22
- - Build process fails during Docker image creation
23
- - "No space left on device" errors
24
- - Dependency installation failures
25
-
26
- #### Solutions
27
-
28
- **Memory/Disk Space Issues:**
29
- ```bash
30
- # Clean up Docker system
31
- docker system prune -a
32
-
33
- # Remove unused images
34
- docker image prune -a
35
-
36
- # Check disk space
37
- df -h
38
- ```
39
-
40
- **Dependency Issues:**
41
- ```bash
42
- # Clear package manager cache
43
- npm cache clean --force
44
- pip cache purge
45
-
46
- # Update package lists
47
- apt-get update # For Debian/Ubuntu
48
- apk update # For Alpine
49
- ```
50
-
51
- **Multi-stage Build Optimization:**
52
- ```dockerfile
53
- # Use .dockerignore to exclude unnecessary files
54
- echo "node_modules" >> .dockerignore
55
- echo ".git" >> .dockerignore
56
- echo "*.md" >> .dockerignore
57
- echo "tests/" >> .dockerignore
58
- ```
59
-
60
- ### 2. Memory Limit Exceeded
61
-
62
- #### Symptoms
63
- - Services crash with OOM (Out of Memory) errors
64
- - Slow performance or timeouts
65
- - Platform-specific memory limit warnings
66
-
67
- #### Solutions
68
-
69
- **Immediate Fixes:**
70
- ```bash
71
- # Check memory usage
72
- docker stats
73
- htop
74
- free -h
75
-
76
- # Restart services to clear memory
77
- docker-compose restart
78
- ```
79
-
80
- **Long-term Optimization:**
81
- ```bash
82
- # Use Alpine Linux base images
83
- FROM python:3.11-alpine instead of python:3.11
84
-
85
- # Remove development dependencies
86
- pip install --no-dev
87
- npm ci --only=production
88
-
89
- # Use external services
90
- # Replace Ollama with Google Gemini API
91
- # Use Qdrant Cloud instead of self-hosted
92
- ```
93
-
94
- ### 3. Service Startup Failures
95
-
96
- #### Symptoms
97
- - Services fail to start or immediately crash
98
- - Health checks fail
99
- - Connection refused errors
100
-
101
- #### Diagnostic Steps
102
- ```bash
103
- # Check service logs
104
- docker-compose logs service-name
105
- kubectl logs pod-name # For Kubernetes
106
- flyctl logs # For Fly.io
107
-
108
- # Check service status
109
- docker-compose ps
110
- systemctl status service-name
111
-
112
- # Test service connectivity
113
- curl -f http://localhost:8000/health
114
- telnet localhost 6333 # For Qdrant
115
- ```
116
-
117
- #### Common Solutions
118
- ```bash
119
- # Check environment variables
120
- env | grep -E "(DATABASE|QDRANT|JWT)"
121
-
122
- # Verify file permissions
123
- chmod +x scripts/*.sh
124
- chown -R app:app /app/data
125
-
126
- # Check port conflicts
127
- netstat -tulpn | grep :8000
128
- lsof -i :8000
129
- ```
130
-
131
- ## Platform-Specific Issues
132
-
133
- ### Railway Deployment Issues
134
-
135
- #### Issue: Service Won't Start
136
- ```bash
137
- # Check Railway logs
138
- railway logs
139
-
140
- # Common fixes:
141
- railway variables set PORT=8000
142
- railway variables set DATABASE_URL=sqlite+aiosqlite:///./data/knowledge_assistant.db
143
-
144
- # Restart service
145
- railway service restart
146
- ```
147
-
148
- #### Issue: Memory Limit (512MB) Exceeded
149
- ```bash
150
- # Monitor memory usage
151
- railway metrics
152
-
153
- # Solutions:
154
- # 1. Use external services
155
- railway variables set QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
156
- railway variables set GEMINI_API_KEY=your-api-key
157
-
158
- # 2. Optimize container
159
- # Use multi-stage builds and Alpine images
160
- ```
161
-
162
- ### Fly.io Deployment Issues
163
-
164
- #### Issue: Volume Mount Problems
165
- ```bash
166
- # Check volumes
167
- flyctl volumes list
168
-
169
- # Create missing volume
170
- flyctl volumes create knowledge_data --size 1
171
-
172
- # Verify mount in fly.toml
173
- [mounts]
174
- source = "knowledge_data"
175
- destination = "/app/data"
176
- ```
177
-
178
- #### Issue: Machine Won't Start
179
- ```bash
180
- # Check machine status
181
- flyctl machine list
182
-
183
- # View detailed logs
184
- flyctl logs --app your-app-name
185
-
186
- # Restart machine
187
- flyctl machine restart MACHINE_ID
188
- ```
189
-
190
- ### Google Cloud Run Issues
191
-
192
- #### Issue: Cold Start Timeouts
193
- ```bash
194
- # Check service configuration
195
- gcloud run services describe SERVICE_NAME --region=us-central1
196
-
197
- # Increase timeout and memory
198
- gcloud run services update SERVICE_NAME \
199
- --region=us-central1 \
200
- --timeout=300 \
201
- --memory=1Gi \
202
- --cpu=1000m
203
- ```
204
-
205
- #### Issue: Cloud SQL Connection Problems
206
- ```bash
207
- # Test Cloud SQL connection
208
- gcloud sql connect INSTANCE_NAME --user=USERNAME
209
-
210
- # Check service account permissions
211
- gcloud projects get-iam-policy PROJECT_ID
212
-
213
- # Update connection string
214
- gcloud run services update SERVICE_NAME \
215
- --region=us-central1 \
216
- --set-env-vars="DATABASE_URL=postgresql://user:pass@/db?host=/cloudsql/project:region:instance"
217
- ```
218
-
219
- ### Vercel Deployment Issues
220
-
221
- #### Issue: Serverless Function Timeouts
222
- ```bash
223
- # Check function logs in Vercel dashboard
224
- # Or use Vercel CLI
225
- vercel logs
226
-
227
- # Optimize function performance:
228
- # 1. Reduce cold start time
229
- # 2. Use edge functions for simple operations
230
- # 3. Implement proper caching
231
- ```
232
-
233
- #### Issue: Build Size Limits
234
- ```bash
235
- # Check build output size
236
- du -sh .vercel/output
237
-
238
- # Optimize bundle size:
239
- npm run build -- --analyze
240
- # Remove unused dependencies
241
- npm prune --production
242
- ```
243
-
244
- ## Environment Variables and Secrets
245
-
246
- ### Required Environment Variables
247
-
248
- #### Core Application Variables
249
- ```bash
250
- # Authentication
251
- JWT_SECRET=your-32-character-minimum-secret-key
252
- USER_REGISTRATION_ENABLED=true
253
-
254
- # Database
255
- DATABASE_URL=sqlite+aiosqlite:///./data/knowledge_assistant.db
256
- # Or for PostgreSQL:
257
- DATABASE_URL=postgresql://user:password@host:port/database
258
-
259
- # Vector Database
260
- QDRANT_HOST=localhost
261
- QDRANT_PORT=6333
262
- # Or for Qdrant Cloud:
263
- QDRANT_CLOUD_URL=https://your-cluster.qdrant.io
264
- QDRANT_API_KEY=your-qdrant-api-key
265
-
266
- # LLM Service
267
- GEMINI_API_KEY=your-google-gemini-api-key
268
-
269
- # CORS Configuration
270
- CORS_ORIGINS=https://your-frontend-domain.com,http://localhost:3000
271
-
272
- # Frontend Configuration
273
- VITE_API_BASE_URL=https://your-backend-domain.com
274
- VITE_ENABLE_REGISTRATION=true
275
- VITE_API_TIMEOUT=30000
276
- ```
277
-
278
- ### Secrets Management by Platform
279
-
280
- #### Railway
281
- ```bash
282
- # Set secrets via CLI
283
- railway variables set JWT_SECRET=your-secret
284
- railway variables set GEMINI_API_KEY=your-key
285
-
286
- # Or via web dashboard
287
- # Visit railway.app -> Your Project -> Variables
288
- ```
289
-
290
- #### Fly.io
291
- ```bash
292
- # Set secrets via CLI
293
- flyctl secrets set JWT_SECRET=your-secret
294
- flyctl secrets set GEMINI_API_KEY=your-key
295
-
296
- # List current secrets
297
- flyctl secrets list
298
- ```
299
-
300
- #### Google Cloud Run
301
- ```bash
302
- # Create secrets in Secret Manager
303
- gcloud secrets create jwt-secret --data-file=jwt-secret.txt
304
- gcloud secrets create gemini-api-key --data-file=gemini-key.txt
305
-
306
- # Grant access to service account
307
- gcloud secrets add-iam-policy-binding jwt-secret \
308
- --member="serviceAccount:SERVICE_ACCOUNT@PROJECT.iam.gserviceaccount.com" \
309
- --role="roles/secretmanager.secretAccessor"
310
- ```
311
-
312
- #### Vercel
313
- ```bash
314
- # Set environment variables via CLI
315
- vercel env add JWT_SECRET
316
- vercel env add GEMINI_API_KEY
317
-
318
- # Or via web dashboard
319
- # Visit vercel.com -> Your Project -> Settings -> Environment Variables
320
- ```
321
-
322
- ### Environment Variable Validation
323
-
324
- Create a validation script:
325
- ```bash
326
- #!/bin/bash
327
- # validate-env.sh
328
-
329
- required_vars=(
330
- "JWT_SECRET"
331
- "GEMINI_API_KEY"
332
- "DATABASE_URL"
333
- )
334
-
335
- for var in "${required_vars[@]}"; do
336
- if [[ -z "${!var}" ]]; then
337
- echo "ERROR: $var is not set"
338
- exit 1
339
- fi
340
- done
341
-
342
- # Validate JWT secret length
343
- if [[ ${#JWT_SECRET} -lt 32 ]]; then
344
- echo "ERROR: JWT_SECRET must be at least 32 characters"
345
- exit 1
346
- fi
347
-
348
- echo "All environment variables are valid"
349
- ```
350
-
351
- ## Performance Optimization
352
-
353
- ### Container Optimization
354
-
355
- #### Multi-stage Dockerfile Example
356
- ```dockerfile
357
- # Build stage
358
- FROM node:18-alpine AS frontend-builder
359
- WORKDIR /app
360
- COPY package*.json ./
361
- RUN npm ci --only=production
362
- COPY . .
363
- RUN npm run build
364
-
365
- # Production stage
366
- FROM nginx:alpine
367
- COPY --from=frontend-builder /app/dist /usr/share/nginx/html
368
- COPY nginx.conf /etc/nginx/nginx.conf
369
- EXPOSE 80
370
- CMD ["nginx", "-g", "daemon off;"]
371
- ```
372
-
373
- #### Image Size Optimization
374
- ```bash
375
- # Before optimization
376
- docker images | grep knowledge-assistant
377
- # knowledge-assistant-backend latest 7.84GB
378
-
379
- # After optimization techniques:
380
- # 1. Multi-stage builds
381
- # 2. Alpine base images
382
- # 3. Dependency pruning
383
- # 4. Layer optimization
384
-
385
- # After optimization
386
- docker images | grep knowledge-assistant
387
- # knowledge-assistant-backend latest 156MB
388
- ```
389
-
390
- ### Database Performance
391
-
392
- #### SQLite Optimization
393
- ```python
394
- # In your database configuration
395
- DATABASE_CONFIG = {
396
- "pool_pre_ping": True,
397
- "pool_recycle": 300,
398
- "connect_args": {
399
- "check_same_thread": False,
400
- "timeout": 20,
401
- "isolation_level": None,
402
- }
403
- }
404
- ```
405
-
406
- #### PostgreSQL Optimization
407
- ```python
408
- # Connection pooling
409
- DATABASE_CONFIG = {
410
- "pool_size": 5,
411
- "max_overflow": 10,
412
- "pool_pre_ping": True,
413
- "pool_recycle": 3600,
414
- }
415
- ```
416
-
417
- ### API Performance
418
-
419
- #### Caching Implementation
420
- ```python
421
- from functools import lru_cache
422
- import redis
423
-
424
- # In-memory caching
425
- @lru_cache(maxsize=128)
426
- def get_cached_embeddings(text_hash):
427
- return generate_embeddings(text)
428
-
429
- # Redis caching (if available)
430
- redis_client = redis.Redis(host='localhost', port=6379, db=0)
431
-
432
- def cache_query_result(query_hash, result):
433
- redis_client.setex(query_hash, 3600, json.dumps(result))
434
- ```
435
-
436
- ### Scaling Guidelines
437
-
438
- #### Horizontal Scaling
439
- ```yaml
440
- # For Kubernetes
441
- apiVersion: apps/v1
442
- kind: Deployment
443
- metadata:
444
- name: knowledge-assistant-backend
445
- spec:
446
- replicas: 3
447
- selector:
448
- matchLabels:
449
- app: knowledge-assistant-backend
450
- template:
451
- spec:
452
- containers:
453
- - name: backend
454
- image: knowledge-assistant-backend:latest
455
- resources:
456
- requests:
457
- memory: "256Mi"
458
- cpu: "250m"
459
- limits:
460
- memory: "512Mi"
461
- cpu: "500m"
462
- ```
463
-
464
- #### Vertical Scaling
465
- ```bash
466
- # Railway
467
- railway service scale --memory 1024
468
-
469
- # Fly.io
470
- flyctl scale memory 512
471
-
472
- # Google Cloud Run
473
- gcloud run services update SERVICE_NAME \
474
- --memory=1Gi \
475
- --cpu=1000m
476
- ```
477
-
478
- ## Database Issues
479
-
480
- ### SQLite Issues
481
-
482
- #### Database Locked Errors
483
- ```bash
484
- # Check for zombie processes
485
- ps aux | grep python
486
- kill -9 PID
487
-
488
- # Check file permissions
489
- ls -la data/knowledge_assistant.db
490
- chmod 664 data/knowledge_assistant.db
491
-
492
- # Backup and restore database
493
- sqlite3 data/knowledge_assistant.db ".backup backup.db"
494
- mv backup.db data/knowledge_assistant.db
495
- ```
496
-
497
- #### Corruption Recovery
498
- ```bash
499
- # Check database integrity
500
- sqlite3 data/knowledge_assistant.db "PRAGMA integrity_check;"
501
-
502
- # Repair database
503
- sqlite3 data/knowledge_assistant.db ".recover" | sqlite3 repaired.db
504
- mv repaired.db data/knowledge_assistant.db
505
- ```
506
-
507
- ### PostgreSQL Issues
508
-
509
- #### Connection Pool Exhaustion
510
- ```python
511
- # Monitor connection pool
512
- from sqlalchemy import event
513
- from sqlalchemy.pool import Pool
514
-
515
- @event.listens_for(Pool, "connect")
516
- def set_sqlite_pragma(dbapi_connection, connection_record):
517
- print(f"New connection: {dbapi_connection}")
518
-
519
- @event.listens_for(Pool, "checkout")
520
- def receive_checkout(dbapi_connection, connection_record, connection_proxy):
521
- print(f"Connection checked out: {dbapi_connection}")
522
- ```
523
-
524
- #### Migration Issues
525
- ```bash
526
- # Check migration status
527
- alembic current
528
- alembic history
529
-
530
- # Reset migrations (DANGEROUS - backup first!)
531
- alembic stamp head
532
- alembic revision --autogenerate -m "Reset migrations"
533
- alembic upgrade head
534
- ```
535
-
536
- ## Service Communication Problems
537
-
538
- ### Internal Service Discovery
539
-
540
- #### Docker Compose
541
- ```yaml
542
- # Ensure services can communicate
543
- version: '3.8'
544
- services:
545
- backend:
546
- environment:
547
- - QDRANT_HOST=qdrant
548
- - QDRANT_PORT=6333
549
- qdrant:
550
- hostname: qdrant
551
- ```
552
-
553
- #### Kubernetes
554
- ```yaml
555
- # Service definition
556
- apiVersion: v1
557
- kind: Service
558
- metadata:
559
- name: qdrant-service
560
- spec:
561
- selector:
562
- app: qdrant
563
- ports:
564
- - port: 6333
565
- targetPort: 6333
566
- ```
567
-
568
- ### Network Debugging
569
-
570
- #### Test Service Connectivity
571
- ```bash
572
- # From within container
573
- curl -f http://qdrant:6333/health
574
- telnet qdrant 6333
575
- nslookup qdrant
576
-
577
- # Check DNS resolution
578
- dig qdrant.default.svc.cluster.local # Kubernetes
579
- nslookup qdrant-service.railway.internal # Railway
580
- ```
581
-
582
- #### Port Conflicts
583
- ```bash
584
- # Check port usage
585
- netstat -tulpn | grep :6333
586
- lsof -i :6333
587
-
588
- # Kill conflicting processes
589
- sudo kill -9 $(lsof -t -i:6333)
590
- ```
591
-
592
- ## Monitoring and Logging
593
-
594
- ### Health Check Implementation
595
-
596
- #### Backend Health Endpoint
597
- ```python
598
- from fastapi import FastAPI, HTTPException
599
- import asyncio
600
-
601
- app = FastAPI()
602
-
603
- @app.get("/health")
604
- async def health_check():
605
- checks = {
606
- "database": await check_database(),
607
- "qdrant": await check_qdrant(),
608
- "gemini": await check_gemini_api(),
609
- }
610
-
611
- if all(checks.values()):
612
- return {"status": "healthy", "checks": checks}
613
- else:
614
- raise HTTPException(status_code=503, detail={"status": "unhealthy", "checks": checks})
615
-
616
- async def check_database():
617
- try:
618
- # Test database connection
619
- return True
620
- except Exception:
621
- return False
622
- ```
623
-
624
- #### Monitoring Script
625
- ```bash
626
- #!/bin/bash
627
- # monitor-services.sh
628
-
629
- services=("frontend:3000" "backend:8000" "qdrant:6333")
630
-
631
- for service in "${services[@]}"; do
632
- name=${service%:*}
633
- port=${service#*:}
634
-
635
- if curl -f -s "http://localhost:$port/health" > /dev/null; then
636
- echo "✅ $name is healthy"
637
- else
638
- echo "❌ $name is unhealthy"
639
- # Send alert or restart service
640
- fi
641
- done
642
- ```
643
-
644
- ### Log Aggregation
645
-
646
- #### Centralized Logging
647
- ```bash
648
- # Docker Compose with logging
649
- version: '3.8'
650
- services:
651
- backend:
652
- logging:
653
- driver: "json-file"
654
- options:
655
- max-size: "10m"
656
- max-file: "3"
657
- ```
658
-
659
- #### Log Analysis
660
- ```bash
661
- # Search for errors
662
- grep -i error logs/*.log
663
- grep -E "(500|error|exception)" logs/backend.log
664
-
665
- # Monitor real-time logs
666
- tail -f logs/backend.log | grep -i error
667
- ```
668
-
669
- ## Maintenance Procedures
670
-
671
- ### Regular Maintenance Tasks
672
-
673
- #### Daily Tasks
674
- ```bash
675
- #!/bin/bash
676
- # daily-maintenance.sh
677
-
678
- # Check service health
679
- ./scripts/health-check.sh
680
-
681
- # Backup database
682
- ./scripts/backup-database.sh
683
-
684
- # Clean up logs
685
- find logs/ -name "*.log" -mtime +7 -delete
686
-
687
- # Check disk space
688
- df -h | awk '$5 > 80 {print "WARNING: " $0}'
689
- ```
690
-
691
- #### Weekly Tasks
692
- ```bash
693
- #!/bin/bash
694
- # weekly-maintenance.sh
695
-
696
- # Update dependencies (in development)
697
- npm audit fix
698
- pip list --outdated
699
-
700
- # Clean up Docker
701
- docker system prune -f
702
-
703
- # Rotate logs
704
- logrotate /etc/logrotate.d/knowledge-assistant
705
- ```
706
-
707
- #### Monthly Tasks
708
- ```bash
709
- #!/bin/bash
710
- # monthly-maintenance.sh
711
-
712
- # Security updates
713
- apt update && apt upgrade -y # Ubuntu/Debian
714
- apk update && apk upgrade # Alpine
715
-
716
- # Performance analysis
717
- ./scripts/performance-report.sh
718
-
719
- # Backup verification
720
- ./scripts/verify-backups.sh
721
- ```
722
-
723
- ### Database Maintenance
724
-
725
- #### SQLite Maintenance
726
- ```bash
727
- # Vacuum database to reclaim space
728
- sqlite3 data/knowledge_assistant.db "VACUUM;"
729
-
730
- # Analyze query performance
731
- sqlite3 data/knowledge_assistant.db "ANALYZE;"
732
-
733
- # Check database size
734
- du -sh data/knowledge_assistant.db
735
- ```
736
-
737
- #### PostgreSQL Maintenance
738
- ```sql
739
- -- Vacuum and analyze
740
- VACUUM ANALYZE;
741
-
742
- -- Check database size
743
- SELECT pg_size_pretty(pg_database_size('knowledge_assistant'));
744
-
745
- -- Check table sizes
746
- SELECT
747
- schemaname,
748
- tablename,
749
- pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
750
- FROM pg_tables
751
- WHERE schemaname = 'public'
752
- ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
753
- ```
754
-
755
- ## Emergency Recovery
756
-
757
- ### Service Recovery Procedures
758
-
759
- #### Complete Service Failure
760
- ```bash
761
- # 1. Check system resources
762
- free -h
763
- df -h
764
- ps aux | head -20
765
-
766
- # 2. Restart all services
767
- docker-compose down
768
- docker-compose up -d
769
-
770
- # 3. Check logs for errors
771
- docker-compose logs --tail=100
772
-
773
- # 4. Verify health
774
- curl -f http://localhost:8000/health
775
- ```
776
-
777
- #### Database Recovery
778
- ```bash
779
- # 1. Stop application
780
- docker-compose stop backend
781
-
782
- # 2. Backup current database
783
- cp data/knowledge_assistant.db data/knowledge_assistant.db.backup
784
-
785
- # 3. Restore from backup
786
- cp backups/latest-backup.db data/knowledge_assistant.db
787
-
788
- # 4. Start application
789
- docker-compose start backend
790
-
791
- # 5. Verify functionality
792
- curl -f http://localhost:8000/health
793
- ```
794
-
795
- ### Rollback Procedures
796
-
797
- #### Docker Deployment Rollback
798
- ```bash
799
- # List previous images
800
- docker images | grep knowledge-assistant
801
-
802
- # Rollback to previous version
803
- docker-compose down
804
- docker tag knowledge-assistant-backend:latest knowledge-assistant-backend:rollback
805
- docker tag knowledge-assistant-backend:previous knowledge-assistant-backend:latest
806
- docker-compose up -d
807
- ```
808
-
809
- #### Platform-Specific Rollbacks
810
-
811
- **Railway:**
812
- ```bash
813
- railway rollback
814
- ```
815
-
816
- **Fly.io:**
817
- ```bash
818
- flyctl releases rollback
819
- ```
820
-
821
- **Google Cloud Run:**
822
- ```bash
823
- gcloud run services update SERVICE_NAME \
824
- --image=gcr.io/PROJECT/IMAGE:PREVIOUS_TAG
825
- ```
826
-
827
- **Vercel:**
828
- ```bash
829
- vercel rollback
830
- ```
831
-
832
- ### Data Recovery
833
-
834
- #### Vector Database Recovery
835
- ```bash
836
- # Backup Qdrant data
837
- tar -czf qdrant-backup-$(date +%Y%m%d).tar.gz data/qdrant/
838
-
839
- # Restore Qdrant data
840
- tar -xzf qdrant-backup-YYYYMMDD.tar.gz -C data/
841
- ```
842
-
843
- #### User Data Recovery
844
- ```bash
845
- # Export user data
846
- sqlite3 data/knowledge_assistant.db ".mode csv" ".output users.csv" "SELECT * FROM users;"
847
-
848
- # Import user data
849
- sqlite3 data/knowledge_assistant.db ".mode csv" ".import users.csv users"
850
- ```
851
-
852
- ## Getting Help
853
-
854
- ### Support Channels
855
-
856
- 1. **Documentation**: Check platform-specific documentation first
857
- 2. **Community Forums**:
858
- - Railway: [Discord](https://discord.gg/railway)
859
- - Fly.io: [Community Forum](https://community.fly.io/)
860
- - Google Cloud: [Stack Overflow](https://stackoverflow.com/questions/tagged/google-cloud-run)
861
- - Vercel: [Discord](https://discord.gg/vercel)
862
-
863
- 3. **Issue Reporting**: Create detailed bug reports with:
864
- - Platform and version information
865
- - Error messages and logs
866
- - Steps to reproduce
867
- - Environment configuration (without secrets)
868
-
869
- ### Diagnostic Information Collection
870
-
871
- ```bash
872
- #!/bin/bash
873
- # collect-diagnostics.sh
874
-
875
- echo "=== System Information ==="
876
- uname -a
877
- docker --version
878
- docker-compose --version
879
-
880
- echo "=== Service Status ==="
881
- docker-compose ps
882
-
883
- echo "=== Resource Usage ==="
884
- free -h
885
- df -h
886
-
887
- echo "=== Recent Logs ==="
888
- docker-compose logs --tail=50
889
-
890
- echo "=== Environment Variables ==="
891
- env | grep -E "(DATABASE|QDRANT|JWT)" | sed 's/=.*/=***/'
892
- ```
893
-
894
- This troubleshooting guide should help you diagnose and resolve most common issues with the Knowledge Assistant RAG application deployment.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cloudbuild.yaml DELETED
@@ -1,146 +0,0 @@
1
- # Cloud Build Configuration for Knowledge Assistant
2
- # This file defines the build pipeline for all services
3
-
4
- steps:
5
- # Build Backend Docker Image
6
- - name: 'gcr.io/cloud-builders/docker'
7
- id: 'build-backend'
8
- args:
9
- - 'build'
10
- - '-t'
11
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-backend:$BUILD_ID'
12
- - '-t'
13
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-backend:latest'
14
- - '-f'
15
- - 'Dockerfile'
16
- - '.'
17
- dir: 'Knowledge_Assistant_RAG'
18
-
19
- # Build Frontend Docker Image
20
- - name: 'gcr.io/cloud-builders/docker'
21
- id: 'build-frontend'
22
- args:
23
- - 'build'
24
- - '-t'
25
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-frontend:$BUILD_ID'
26
- - '-t'
27
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-frontend:latest'
28
- - '-f'
29
- - 'Dockerfile'
30
- - '.'
31
- dir: 'Knowledge_Assistant_RAG/rag-quest-hub'
32
-
33
- # Push Backend Image
34
- - name: 'gcr.io/cloud-builders/docker'
35
- id: 'push-backend'
36
- args:
37
- - 'push'
38
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-backend:$BUILD_ID'
39
- waitFor: ['build-backend']
40
-
41
- # Push Frontend Image
42
- - name: 'gcr.io/cloud-builders/docker'
43
- id: 'push-frontend'
44
- args:
45
- - 'push'
46
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-frontend:$BUILD_ID'
47
- waitFor: ['build-frontend']
48
-
49
- # Push Latest Tags
50
- - name: 'gcr.io/cloud-builders/docker'
51
- id: 'push-backend-latest'
52
- args:
53
- - 'push'
54
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-backend:latest'
55
- waitFor: ['push-backend']
56
-
57
- - name: 'gcr.io/cloud-builders/docker'
58
- id: 'push-frontend-latest'
59
- args:
60
- - 'push'
61
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-frontend:latest'
62
- waitFor: ['push-frontend']
63
-
64
- # Deploy Qdrant Service (using public image)
65
- - name: 'gcr.io/cloud-builders/gcloud'
66
- id: 'deploy-qdrant'
67
- args:
68
- - 'run'
69
- - 'deploy'
70
- - 'knowledge-assistant-qdrant'
71
- - '--image=qdrant/qdrant:latest'
72
- - '--platform=managed'
73
- - '--region=us-central1'
74
- - '--memory=512Mi'
75
- - '--cpu=1'
76
- - '--max-instances=5'
77
- - '--min-instances=1'
78
- - '--port=6333'
79
- - '--service-account=knowledge-assistant-qdrant-sa@$PROJECT_ID.iam.gserviceaccount.com'
80
- - '--set-env-vars=QDRANT__SERVICE__HTTP_PORT=6333,QDRANT__SERVICE__GRPC_PORT=6334'
81
- - '--allow-unauthenticated'
82
- waitFor: ['-']
83
-
84
- # Deploy Backend Service
85
- - name: 'gcr.io/cloud-builders/gcloud'
86
- id: 'deploy-backend'
87
- args:
88
- - 'run'
89
- - 'deploy'
90
- - 'knowledge-assistant-backend'
91
- - '--image=gcr.io/$PROJECT_ID/knowledge-assistant-backend:$BUILD_ID'
92
- - '--platform=managed'
93
- - '--region=us-central1'
94
- - '--memory=1Gi'
95
- - '--cpu=1'
96
- - '--max-instances=10'
97
- - '--min-instances=0'
98
- - '--port=8000'
99
- - '--service-account=knowledge-assistant-backend-sa@$PROJECT_ID.iam.gserviceaccount.com'
100
- - '--add-cloudsql-instances=$PROJECT_ID:us-central1:knowledge-assistant-db'
101
- - '--update-secrets=DATABASE_URL=knowledge-assistant-secrets:DATABASE_URL:latest'
102
- - '--update-secrets=JWT_SECRET=knowledge-assistant-secrets:JWT_SECRET:latest'
103
- - '--update-secrets=GEMINI_API_KEY=knowledge-assistant-secrets:GEMINI_API_KEY:latest'
104
- - '--set-env-vars=QDRANT_HOST=https://knowledge-assistant-qdrant-${_QDRANT_HASH}-uc.a.run.app,QDRANT_PORT=443,PYTHONUNBUFFERED=1,PYTHONDONTWRITEBYTECODE=1'
105
- - '--allow-unauthenticated'
106
- waitFor: ['push-backend-latest', 'deploy-qdrant']
107
-
108
- # Deploy Frontend Service
109
- - name: 'gcr.io/cloud-builders/gcloud'
110
- id: 'deploy-frontend'
111
- args:
112
- - 'run'
113
- - 'deploy'
114
- - 'knowledge-assistant-frontend'
115
- - '--image=gcr.io/$PROJECT_ID/knowledge-assistant-frontend:$BUILD_ID'
116
- - '--platform=managed'
117
- - '--region=us-central1'
118
- - '--memory=512Mi'
119
- - '--cpu=1'
120
- - '--max-instances=10'
121
- - '--min-instances=0'
122
- - '--port=8080'
123
- - '--set-env-vars=VITE_API_BASE_URL=https://knowledge-assistant-backend-${_BACKEND_HASH}-uc.a.run.app'
124
- - '--allow-unauthenticated'
125
- waitFor: ['push-frontend-latest', 'deploy-backend']
126
-
127
- # Build configuration
128
- options:
129
- machineType: 'E2_HIGHCPU_8'
130
- diskSizeGb: 100
131
- logging: CLOUD_LOGGING_ONLY
132
-
133
- # Substitutions for dynamic values
134
- substitutions:
135
- _BACKEND_HASH: 'auto-generated-hash'
136
- _QDRANT_HASH: 'auto-generated-hash'
137
-
138
- # Build timeout
139
- timeout: '1800s' # 30 minutes
140
-
141
- # Images to be pushed to Container Registry
142
- images:
143
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-backend:$BUILD_ID'
144
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-backend:latest'
145
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-frontend:$BUILD_ID'
146
- - 'gcr.io/$PROJECT_ID/knowledge-assistant-frontend:latest'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cloudrun/backend-service.yaml DELETED
@@ -1,88 +0,0 @@
1
- apiVersion: serving.knative.dev/v1
2
- kind: Service
3
- metadata:
4
- name: knowledge-assistant-backend
5
- annotations:
6
- run.googleapis.com/ingress: all
7
- run.googleapis.com/execution-environment: gen2
8
- spec:
9
- template:
10
- metadata:
11
- annotations:
12
- # Resource limits for free tier
13
- run.googleapis.com/memory: "1Gi"
14
- run.googleapis.com/cpu: "1000m"
15
- run.googleapis.com/max-instances: "10"
16
- run.googleapis.com/min-instances: "0"
17
- # Enable CPU allocation only during requests
18
- run.googleapis.com/cpu-throttling: "true"
19
- # Cloud SQL connection (if using Cloud SQL)
20
- run.googleapis.com/cloudsql-instances: "PROJECT_ID:REGION:knowledge-assistant-db"
21
- spec:
22
- containerConcurrency: 80
23
- timeoutSeconds: 900
24
- containers:
25
- - name: backend
26
- image: gcr.io/PROJECT_ID/knowledge-assistant-backend:latest
27
- ports:
28
- - name: http1
29
- containerPort: 8000
30
- env:
31
- - name: DATABASE_URL
32
- valueFrom:
33
- secretKeyRef:
34
- name: knowledge-assistant-secrets
35
- key: DATABASE_URL
36
- - name: JWT_SECRET
37
- valueFrom:
38
- secretKeyRef:
39
- name: knowledge-assistant-secrets
40
- key: JWT_SECRET
41
- - name: QDRANT_HOST
42
- value: "https://knowledge-assistant-qdrant-HASH-uc.a.run.app"
43
- - name: QDRANT_PORT
44
- value: "443"
45
- - name: GEMINI_API_KEY
46
- valueFrom:
47
- secretKeyRef:
48
- name: knowledge-assistant-secrets
49
- key: GEMINI_API_KEY
50
- - name: CORS_ORIGINS
51
- value: "https://knowledge-assistant-frontend-HASH-uc.a.run.app"
52
- - name: JWT_LIFETIME_SECONDS
53
- value: "3600"
54
- - name: USER_REGISTRATION_ENABLED
55
- value: "true"
56
- - name: EMAIL_VERIFICATION_REQUIRED
57
- value: "false"
58
- - name: PYTHONUNBUFFERED
59
- value: "1"
60
- - name: PYTHONDONTWRITEBYTECODE
61
- value: "1"
62
- resources:
63
- limits:
64
- memory: "1Gi"
65
- cpu: "1000m"
66
- volumeMounts:
67
- - name: data-volume
68
- mountPath: /app/data
69
- livenessProbe:
70
- httpGet:
71
- path: /health
72
- port: 8000
73
- initialDelaySeconds: 30
74
- periodSeconds: 30
75
- timeoutSeconds: 10
76
- readinessProbe:
77
- httpGet:
78
- path: /health
79
- port: 8000
80
- initialDelaySeconds: 10
81
- periodSeconds: 10
82
- timeoutSeconds: 5
83
- volumes:
84
- - name: data-volume
85
- emptyDir: {}
86
- traffic:
87
- - percent: 100
88
- latestRevision: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cloudrun/cloudrun-config.yaml DELETED
@@ -1,95 +0,0 @@
1
- # Complete Cloud Run Configuration for Knowledge Assistant
2
- # This file contains all the necessary configurations for deploying to Google Cloud Run
3
-
4
- # Project Configuration
5
- PROJECT_ID: "your-gcp-project-id"
6
- REGION: "us-central1"
7
- SERVICES:
8
- - name: "knowledge-assistant-frontend"
9
- image: "gcr.io/PROJECT_ID/knowledge-assistant-frontend"
10
- port: 8080
11
- memory: "512Mi"
12
- cpu: "1000m"
13
- max_instances: 10
14
- min_instances: 0
15
-
16
- - name: "knowledge-assistant-backend"
17
- image: "gcr.io/PROJECT_ID/knowledge-assistant-backend"
18
- port: 8000
19
- memory: "1Gi"
20
- cpu: "1000m"
21
- max_instances: 10
22
- min_instances: 0
23
- service_account: "knowledge-assistant-backend-sa@PROJECT_ID.iam.gserviceaccount.com"
24
-
25
- - name: "knowledge-assistant-qdrant"
26
- image: "qdrant/qdrant:latest"
27
- port: 6333
28
- memory: "512Mi"
29
- cpu: "1000m"
30
- max_instances: 5
31
- min_instances: 1
32
- service_account: "knowledge-assistant-qdrant-sa@PROJECT_ID.iam.gserviceaccount.com"
33
-
34
- # Environment Variables Configuration
35
- ENVIRONMENT_VARIABLES:
36
- frontend:
37
- VITE_API_BASE_URL: "https://knowledge-assistant-backend-HASH-uc.a.run.app"
38
- VITE_API_TIMEOUT: "30000"
39
- VITE_ENABLE_REGISTRATION: "true"
40
-
41
- backend:
42
- QDRANT_HOST: "https://knowledge-assistant-qdrant-HASH-uc.a.run.app"
43
- QDRANT_PORT: "443"
44
- CORS_ORIGINS: "https://knowledge-assistant-frontend-HASH-uc.a.run.app"
45
- JWT_LIFETIME_SECONDS: "3600"
46
- USER_REGISTRATION_ENABLED: "true"
47
- EMAIL_VERIFICATION_REQUIRED: "false"
48
- PYTHONUNBUFFERED: "1"
49
- PYTHONDONTWRITEBYTECODE: "1"
50
-
51
- qdrant:
52
- QDRANT__SERVICE__HTTP_PORT: "6333"
53
- QDRANT__SERVICE__GRPC_PORT: "6334"
54
- QDRANT__STORAGE__STORAGE_PATH: "/qdrant/storage"
55
-
56
- # Secret Environment Variables (stored in Secret Manager)
57
- SECRET_VARIABLES:
58
- backend:
59
- - name: "DATABASE_URL"
60
- secret: "knowledge-assistant-secrets"
61
- key: "DATABASE_URL"
62
- - name: "JWT_SECRET"
63
- secret: "knowledge-assistant-secrets"
64
- key: "JWT_SECRET"
65
- - name: "GEMINI_API_KEY"
66
- secret: "knowledge-assistant-secrets"
67
- key: "GEMINI_API_KEY"
68
-
69
- # Cloud SQL Configuration
70
- CLOUD_SQL:
71
- instance_name: "knowledge-assistant-db"
72
- database_name: "knowledge-assistant-main-db"
73
- user_name: "knowledge-assistant-user"
74
- region: "us-central1"
75
- tier: "db-f1-micro"
76
- disk_size: 10
77
-
78
- # IAM Configuration
79
- SERVICE_ACCOUNTS:
80
- - name: "knowledge-assistant-backend-sa"
81
- roles:
82
- - "roles/cloudsql.client"
83
- - "roles/secretmanager.secretAccessor"
84
- - "roles/run.invoker"
85
-
86
- - name: "knowledge-assistant-qdrant-sa"
87
- roles:
88
- - "roles/run.invoker"
89
-
90
- # Resource Limits (Free Tier Optimized)
91
- RESOURCE_LIMITS:
92
- total_memory: "2Gi" # Total across all services
93
- total_cpu: "3000m" # Total across all services
94
- max_requests_per_minute: 1000
95
- max_concurrent_requests: 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cloudrun/cloudsql-config.yaml DELETED
@@ -1,61 +0,0 @@
1
- # Cloud SQL Configuration for Knowledge Assistant
2
- # This file defines the Cloud SQL instance and database configuration
3
-
4
- apiVersion: sql.cnrm.cloud.google.com/v1beta1
5
- kind: SQLInstance
6
- metadata:
7
- name: knowledge-assistant-db
8
- spec:
9
- databaseVersion: POSTGRES_15
10
- region: us-central1
11
- settings:
12
- tier: db-f1-micro # Free tier eligible
13
- availabilityType: ZONAL
14
- diskSize: 10 # GB - minimum for free tier
15
- diskType: PD_HDD
16
- diskAutoresize: true
17
- diskAutoresizeLimit: 20 # GB - stay within free tier limits
18
- backupConfiguration:
19
- enabled: true
20
- startTime: "03:00" # 3 AM UTC
21
- retainedBackups: 7
22
- transactionLogRetentionDays: 7
23
- ipConfiguration:
24
- ipv4Enabled: true
25
- authorizedNetworks: [] # Cloud Run will connect via private IP
26
- requireSsl: true
27
- maintenanceWindow:
28
- day: 7 # Sunday
29
- hour: 4 # 4 AM UTC
30
- updateTrack: stable
31
- userLabels:
32
- app: knowledge-assistant
33
- environment: production
34
- tier: free
35
-
36
- ---
37
-
38
- apiVersion: sql.cnrm.cloud.google.com/v1beta1
39
- kind: SQLDatabase
40
- metadata:
41
- name: knowledge-assistant-main-db
42
- spec:
43
- charset: UTF8
44
- collation: en_US.UTF8
45
- instanceRef:
46
- name: knowledge-assistant-db
47
-
48
- ---
49
-
50
- apiVersion: sql.cnrm.cloud.google.com/v1beta1
51
- kind: SQLUser
52
- metadata:
53
- name: knowledge-assistant-user
54
- spec:
55
- instanceRef:
56
- name: knowledge-assistant-db
57
- password:
58
- valueFrom:
59
- secretKeyRef:
60
- name: knowledge-assistant-secrets
61
- key: DB_PASSWORD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cloudrun/frontend-service.yaml DELETED
@@ -1,55 +0,0 @@
1
- apiVersion: serving.knative.dev/v1
2
- kind: Service
3
- metadata:
4
- name: knowledge-assistant-frontend
5
- annotations:
6
- run.googleapis.com/ingress: all
7
- run.googleapis.com/execution-environment: gen2
8
- spec:
9
- template:
10
- metadata:
11
- annotations:
12
- # Resource limits for free tier
13
- run.googleapis.com/memory: "512Mi"
14
- run.googleapis.com/cpu: "1000m"
15
- run.googleapis.com/max-instances: "10"
16
- run.googleapis.com/min-instances: "0"
17
- # Enable CPU allocation only during requests
18
- run.googleapis.com/cpu-throttling: "true"
19
- spec:
20
- containerConcurrency: 80
21
- timeoutSeconds: 300
22
- containers:
23
- - name: frontend
24
- image: gcr.io/PROJECT_ID/knowledge-assistant-frontend:latest
25
- ports:
26
- - name: http1
27
- containerPort: 8080
28
- env:
29
- - name: VITE_API_BASE_URL
30
- value: "https://knowledge-assistant-backend-HASH-uc.a.run.app"
31
- - name: VITE_API_TIMEOUT
32
- value: "30000"
33
- - name: VITE_ENABLE_REGISTRATION
34
- value: "true"
35
- resources:
36
- limits:
37
- memory: "512Mi"
38
- cpu: "1000m"
39
- livenessProbe:
40
- httpGet:
41
- path: /
42
- port: 8080
43
- initialDelaySeconds: 10
44
- periodSeconds: 30
45
- timeoutSeconds: 5
46
- readinessProbe:
47
- httpGet:
48
- path: /
49
- port: 8080
50
- initialDelaySeconds: 5
51
- periodSeconds: 10
52
- timeoutSeconds: 3
53
- traffic:
54
- - percent: 100
55
- latestRevision: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cloudrun/iam-config.yaml DELETED
@@ -1,84 +0,0 @@
1
- # IAM Configuration for Cloud Run Services
2
- # This file defines the service accounts and IAM roles needed for the Knowledge Assistant application
3
-
4
- # Service Account for Backend Service
5
- apiVersion: iam.cnrm.cloud.google.com/v1beta1
6
- kind: IAMServiceAccount
7
- metadata:
8
- name: knowledge-assistant-backend-sa
9
- namespace: default
10
- spec:
11
- displayName: "Knowledge Assistant Backend Service Account"
12
- description: "Service account for Knowledge Assistant backend with minimal required permissions"
13
-
14
- ---
15
-
16
- # Service Account for Qdrant Service
17
- apiVersion: iam.cnrm.cloud.google.com/v1beta1
18
- kind: IAMServiceAccount
19
- metadata:
20
- name: knowledge-assistant-qdrant-sa
21
- namespace: default
22
- spec:
23
- displayName: "Knowledge Assistant Qdrant Service Account"
24
- description: "Service account for Qdrant vector database service"
25
-
26
- ---
27
-
28
- # IAM Policy Binding for Backend Service Account - Cloud SQL Client
29
- apiVersion: iam.cnrm.cloud.google.com/v1beta1
30
- kind: IAMPolicyMember
31
- metadata:
32
- name: backend-cloudsql-client
33
- spec:
34
- member: serviceAccount:knowledge-assistant-backend-sa@PROJECT_ID.iam.gserviceaccount.com
35
- role: roles/cloudsql.client
36
- resourceRef:
37
- apiVersion: resourcemanager.cnrm.cloud.google.com/v1beta1
38
- kind: Project
39
- external: PROJECT_ID
40
-
41
- ---
42
-
43
- # IAM Policy Binding for Backend Service Account - Secret Manager Accessor
44
- apiVersion: iam.cnrm.cloud.google.com/v1beta1
45
- kind: IAMPolicyMember
46
- metadata:
47
- name: backend-secret-accessor
48
- spec:
49
- member: serviceAccount:knowledge-assistant-backend-sa@PROJECT_ID.iam.gserviceaccount.com
50
- role: roles/secretmanager.secretAccessor
51
- resourceRef:
52
- apiVersion: resourcemanager.cnrm.cloud.google.com/v1beta1
53
- kind: Project
54
- external: PROJECT_ID
55
-
56
- ---
57
-
58
- # IAM Policy Binding for Backend Service Account - Cloud Run Invoker (for internal service communication)
59
- apiVersion: iam.cnrm.cloud.google.com/v1beta1
60
- kind: IAMPolicyMember
61
- metadata:
62
- name: backend-run-invoker
63
- spec:
64
- member: serviceAccount:knowledge-assistant-backend-sa@PROJECT_ID.iam.gserviceaccount.com
65
- role: roles/run.invoker
66
- resourceRef:
67
- apiVersion: resourcemanager.cnrm.cloud.google.com/v1beta1
68
- kind: Project
69
- external: PROJECT_ID
70
-
71
- ---
72
-
73
- # IAM Policy Binding for Qdrant Service Account - Basic Cloud Run permissions
74
- apiVersion: iam.cnrm.cloud.google.com/v1beta1
75
- kind: IAMPolicyMember
76
- metadata:
77
- name: qdrant-run-invoker
78
- spec:
79
- member: serviceAccount:knowledge-assistant-qdrant-sa@PROJECT_ID.iam.gserviceaccount.com
80
- role: roles/run.invoker
81
- resourceRef:
82
- apiVersion: resourcemanager.cnrm.cloud.google.com/v1beta1
83
- kind: Project
84
- external: PROJECT_ID
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cloudrun/qdrant-service.yaml DELETED
@@ -1,61 +0,0 @@
1
- apiVersion: serving.knative.dev/v1
2
- kind: Service
3
- metadata:
4
- name: knowledge-assistant-qdrant
5
- annotations:
6
- run.googleapis.com/ingress: all
7
- run.googleapis.com/execution-environment: gen2
8
- spec:
9
- template:
10
- metadata:
11
- annotations:
12
- # Resource limits for free tier
13
- run.googleapis.com/memory: "512Mi"
14
- run.googleapis.com/cpu: "1000m"
15
- run.googleapis.com/max-instances: "5"
16
- run.googleapis.com/min-instances: "1"
17
- # Keep at least one instance warm for vector database
18
- run.googleapis.com/cpu-throttling: "false"
19
- spec:
20
- containerConcurrency: 50
21
- timeoutSeconds: 300
22
- containers:
23
- - name: qdrant
24
- image: qdrant/qdrant:latest
25
- ports:
26
- - name: http1
27
- containerPort: 6333
28
- env:
29
- - name: QDRANT__SERVICE__HTTP_PORT
30
- value: "6333"
31
- - name: QDRANT__SERVICE__GRPC_PORT
32
- value: "6334"
33
- - name: QDRANT__STORAGE__STORAGE_PATH
34
- value: "/qdrant/storage"
35
- resources:
36
- limits:
37
- memory: "512Mi"
38
- cpu: "1000m"
39
- volumeMounts:
40
- - name: qdrant-storage
41
- mountPath: /qdrant/storage
42
- livenessProbe:
43
- httpGet:
44
- path: /health
45
- port: 6333
46
- initialDelaySeconds: 30
47
- periodSeconds: 30
48
- timeoutSeconds: 10
49
- readinessProbe:
50
- httpGet:
51
- path: /health
52
- port: 6333
53
- initialDelaySeconds: 10
54
- periodSeconds: 10
55
- timeoutSeconds: 5
56
- volumes:
57
- - name: qdrant-storage
58
- emptyDir: {}
59
- traffic:
60
- - percent: 100
61
- latestRevision: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cloudrun/secrets-config.yaml DELETED
@@ -1,34 +0,0 @@
1
- # Secret Manager Configuration for Knowledge Assistant
2
- # This file defines the secrets needed for the application
3
-
4
- apiVersion: secretmanager.cnrm.cloud.google.com/v1beta1
5
- kind: SecretManagerSecret
6
- metadata:
7
- name: knowledge-assistant-secrets
8
- spec:
9
- secretId: knowledge-assistant-secrets
10
- replication:
11
- automatic: true
12
- labels:
13
- app: knowledge-assistant
14
- environment: production
15
-
16
- ---
17
-
18
- # Secret versions will be created separately via gcloud CLI or console
19
- # The following secrets need to be stored:
20
- # - JWT_SECRET: A secure random string for JWT token signing
21
- # - DATABASE_URL: PostgreSQL connection string for Cloud SQL
22
- # - GEMINI_API_KEY: Google Gemini API key for LLM functionality
23
- # - DB_PASSWORD: Database password for the Cloud SQL user
24
-
25
- # Example secret creation commands (to be run after deployment):
26
- # gcloud secrets versions add knowledge-assistant-secrets --data-file=secrets.json
27
- #
28
- # Where secrets.json contains:
29
- # {
30
- # "JWT_SECRET": "your-super-secure-jwt-secret-key-change-this-in-production",
31
- # "DATABASE_URL": "postgresql+asyncpg://knowledge-assistant-user:PASSWORD@/knowledge-assistant-main-db?host=/cloudsql/PROJECT_ID:us-central1:knowledge-assistant-db",
32
- # "GEMINI_API_KEY": "your-gemini-api-key",
33
- # "DB_PASSWORD": "your-secure-database-password"
34
- # }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
create-test-user.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to create a test user for Railway deployment
4
+ """
5
+ import asyncio
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ # Add the src directory to the Python path
11
+ sys.path.insert(0, str(Path(__file__).parent))
12
+
13
+ from src.core.database import get_async_session, create_db_and_tables
14
+ from src.core.auth import get_user_manager
15
+ from src.models.user import UserCreate
16
+ from fastapi_users.exceptions import UserAlreadyExists
17
+
18
+ async def create_test_user():
19
+ """Create a test user for the application"""
20
+
21
+ # Ensure database is initialized
22
+ await create_db_and_tables()
23
+
24
+ # Get database session
25
+ async for session in get_async_session():
26
+ try:
27
+ # Get user manager
28
+ user_manager = get_user_manager()
29
+
30
+ # Create test user
31
+ user_create = UserCreate(
32
+ email="demo@example.com",
33
+ password="demopassword",
34
+ is_verified=True
35
+ )
36
+
37
+ # Try to create the user
38
+ try:
39
+ user = await user_manager.create(user_create)
40
+ print(f"✅ Test user created successfully: {user.email}")
41
+ print(f" User ID: {user.id}")
42
+ print(f" Is Active: {user.is_active}")
43
+ print(f" Is Verified: {user.is_verified}")
44
+
45
+ except UserAlreadyExists:
46
+ print("ℹ️ Test user already exists: demo@example.com")
47
+
48
+ # Try to get the existing user
49
+ existing_user = await user_manager.get_by_email("demo@example.com")
50
+ print(f" User ID: {existing_user.id}")
51
+ print(f" Is Active: {existing_user.is_active}")
52
+ print(f" Is Verified: {existing_user.is_verified}")
53
+
54
+ except Exception as e:
55
+ print(f"❌ Error creating test user: {e}")
56
+ import traceback
57
+ traceback.print_exc()
58
+
59
+ break # Exit after first session
60
+
61
+ if __name__ == "__main__":
62
+ # Set environment variables if not set
63
+ if not os.getenv("DATABASE_URL"):
64
+ os.environ["DATABASE_URL"] = "sqlite+aiosqlite:///./data/knowledge_assistant.db"
65
+
66
+ if not os.getenv("JWT_SECRET"):
67
+ os.environ["JWT_SECRET"] = "your-secret-key-here-change-in-production"
68
+
69
+ # Create data directory
70
+ os.makedirs("data", exist_ok=True)
71
+
72
+ print("Creating test user for Railway deployment...")
73
+ asyncio.run(create_test_user())
deploy-cloudrun.sh DELETED
@@ -1,422 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Cloud Run Deployment Script for Knowledge Assistant
4
- # This script automates the deployment of the Knowledge Assistant application to Google Cloud Run
5
-
6
- set -e # Exit on any error
7
-
8
- # Colors for output
9
- RED='\033[0;31m'
10
- GREEN='\033[0;32m'
11
- YELLOW='\033[1;33m'
12
- BLUE='\033[0;34m'
13
- NC='\033[0m' # No Color
14
-
15
- # Configuration
16
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
17
- ENV_FILE="${SCRIPT_DIR}/.env.cloudrun"
18
- REGION="us-central1"
19
-
20
- # Function to print colored output
21
- print_status() {
22
- echo -e "${BLUE}[INFO]${NC} $1"
23
- }
24
-
25
- print_success() {
26
- echo -e "${GREEN}[SUCCESS]${NC} $1"
27
- }
28
-
29
- print_warning() {
30
- echo -e "${YELLOW}[WARNING]${NC} $1"
31
- }
32
-
33
- print_error() {
34
- echo -e "${RED}[ERROR]${NC} $1"
35
- }
36
-
37
- # Function to check if required tools are installed
38
- check_prerequisites() {
39
- print_status "Checking prerequisites..."
40
-
41
- if ! command -v gcloud &> /dev/null; then
42
- print_error "gcloud CLI is not installed. Please install it from https://cloud.google.com/sdk/docs/install"
43
- exit 1
44
- fi
45
-
46
- if ! command -v docker &> /dev/null; then
47
- print_error "Docker is not installed. Please install Docker first."
48
- exit 1
49
- fi
50
-
51
- print_success "Prerequisites check passed"
52
- }
53
-
54
- # Function to load environment variables
55
- load_environment() {
56
- if [[ -f "$ENV_FILE" ]]; then
57
- print_status "Loading environment variables from $ENV_FILE"
58
- source "$ENV_FILE"
59
- else
60
- print_error "Environment file $ENV_FILE not found. Please copy .env.cloudrun.template to .env.cloudrun and configure it."
61
- exit 1
62
- fi
63
-
64
- # Validate required variables
65
- if [[ -z "$PROJECT_ID" ]]; then
66
- print_error "PROJECT_ID is not set in environment file"
67
- exit 1
68
- fi
69
-
70
- print_success "Environment variables loaded"
71
- }
72
-
73
- # Function to authenticate and set project
74
- setup_gcloud() {
75
- print_status "Setting up gcloud configuration..."
76
-
77
- # Set the project
78
- gcloud config set project "$PROJECT_ID"
79
-
80
- # Enable required APIs
81
- print_status "Enabling required Google Cloud APIs..."
82
- gcloud services enable \
83
- cloudbuild.googleapis.com \
84
- run.googleapis.com \
85
- containerregistry.googleapis.com \
86
- sqladmin.googleapis.com \
87
- secretmanager.googleapis.com \
88
- iam.googleapis.com
89
-
90
- print_success "gcloud setup completed"
91
- }
92
-
93
- # Function to create secrets
94
- create_secrets() {
95
- print_status "Creating secrets in Secret Manager..."
96
-
97
- # Check if secret already exists
98
- if gcloud secrets describe knowledge-assistant-secrets &>/dev/null; then
99
- print_warning "Secret knowledge-assistant-secrets already exists, skipping creation"
100
- else
101
- gcloud secrets create knowledge-assistant-secrets --replication-policy="automatic"
102
- print_success "Created secret: knowledge-assistant-secrets"
103
- fi
104
-
105
- # Create temporary secrets file
106
- cat > /tmp/secrets.json << EOF
107
- {
108
- "JWT_SECRET": "${JWT_SECRET}",
109
- "DATABASE_URL": "${DATABASE_URL}",
110
- "GEMINI_API_KEY": "${GEMINI_API_KEY}"
111
- }
112
- EOF
113
-
114
- # Add secret version
115
- gcloud secrets versions add knowledge-assistant-secrets --data-file=/tmp/secrets.json
116
-
117
- # Clean up temporary file
118
- rm /tmp/secrets.json
119
-
120
- print_success "Secrets created and configured"
121
- }
122
-
123
- # Function to create service accounts
124
- create_service_accounts() {
125
- print_status "Creating service accounts..."
126
-
127
- # Backend service account
128
- if gcloud iam service-accounts describe "knowledge-assistant-backend-sa@${PROJECT_ID}.iam.gserviceaccount.com" &>/dev/null; then
129
- print_warning "Backend service account already exists, skipping creation"
130
- else
131
- gcloud iam service-accounts create knowledge-assistant-backend-sa \
132
- --display-name="Knowledge Assistant Backend Service Account" \
133
- --description="Service account for Knowledge Assistant backend"
134
- print_success "Created backend service account"
135
- fi
136
-
137
- # Qdrant service account
138
- if gcloud iam service-accounts describe "knowledge-assistant-qdrant-sa@${PROJECT_ID}.iam.gserviceaccount.com" &>/dev/null; then
139
- print_warning "Qdrant service account already exists, skipping creation"
140
- else
141
- gcloud iam service-accounts create knowledge-assistant-qdrant-sa \
142
- --display-name="Knowledge Assistant Qdrant Service Account" \
143
- --description="Service account for Qdrant vector database"
144
- print_success "Created qdrant service account"
145
- fi
146
-
147
- # Grant IAM roles
148
- print_status "Granting IAM roles..."
149
-
150
- gcloud projects add-iam-policy-binding "$PROJECT_ID" \
151
- --member="serviceAccount:knowledge-assistant-backend-sa@${PROJECT_ID}.iam.gserviceaccount.com" \
152
- --role="roles/cloudsql.client"
153
-
154
- gcloud projects add-iam-policy-binding "$PROJECT_ID" \
155
- --member="serviceAccount:knowledge-assistant-backend-sa@${PROJECT_ID}.iam.gserviceaccount.com" \
156
- --role="roles/secretmanager.secretAccessor"
157
-
158
- gcloud projects add-iam-policy-binding "$PROJECT_ID" \
159
- --member="serviceAccount:knowledge-assistant-backend-sa@${PROJECT_ID}.iam.gserviceaccount.com" \
160
- --role="roles/run.invoker"
161
-
162
- print_success "Service accounts and IAM roles configured"
163
- }
164
-
165
- # Function to create Cloud SQL instance
166
- create_cloud_sql() {
167
- print_status "Creating Cloud SQL instance..."
168
-
169
- # Check if instance already exists
170
- if gcloud sql instances describe knowledge-assistant-db &>/dev/null; then
171
- print_warning "Cloud SQL instance already exists, skipping creation"
172
- else
173
- gcloud sql instances create knowledge-assistant-db \
174
- --database-version=POSTGRES_15 \
175
- --tier=db-f1-micro \
176
- --region="$REGION" \
177
- --storage-type=HDD \
178
- --storage-size=10GB \
179
- --storage-auto-increase \
180
- --storage-auto-increase-limit=20GB \
181
- --backup-start-time=03:00 \
182
- --maintenance-window-day=SUN \
183
- --maintenance-window-hour=04 \
184
- --maintenance-release-channel=production
185
-
186
- print_success "Created Cloud SQL instance"
187
- fi
188
-
189
- # Create database
190
- if gcloud sql databases describe knowledge-assistant-main-db --instance=knowledge-assistant-db &>/dev/null; then
191
- print_warning "Database already exists, skipping creation"
192
- else
193
- gcloud sql databases create knowledge-assistant-main-db --instance=knowledge-assistant-db
194
- print_success "Created database"
195
- fi
196
-
197
- # Create user (password will be generated)
198
- DB_PASSWORD=$(openssl rand -base64 32)
199
- if gcloud sql users describe knowledge-assistant-user --instance=knowledge-assistant-db &>/dev/null; then
200
- print_warning "Database user already exists, updating password"
201
- gcloud sql users set-password knowledge-assistant-user \
202
- --instance=knowledge-assistant-db \
203
- --password="$DB_PASSWORD"
204
- else
205
- gcloud sql users create knowledge-assistant-user \
206
- --instance=knowledge-assistant-db \
207
- --password="$DB_PASSWORD"
208
- print_success "Created database user"
209
- fi
210
-
211
- # Update DATABASE_URL in secrets
212
- CONNECTION_NAME="${PROJECT_ID}:${REGION}:knowledge-assistant-db"
213
- NEW_DATABASE_URL="postgresql+asyncpg://knowledge-assistant-user:${DB_PASSWORD}@/knowledge-assistant-main-db?host=/cloudsql/${CONNECTION_NAME}"
214
-
215
- # Update secrets with new database URL
216
- cat > /tmp/secrets.json << EOF
217
- {
218
- "JWT_SECRET": "${JWT_SECRET}",
219
- "DATABASE_URL": "${NEW_DATABASE_URL}",
220
- "GEMINI_API_KEY": "${GEMINI_API_KEY}"
221
- }
222
- EOF
223
-
224
- gcloud secrets versions add knowledge-assistant-secrets --data-file=/tmp/secrets.json
225
- rm /tmp/secrets.json
226
-
227
- print_success "Cloud SQL setup completed"
228
- }
229
-
230
- # Function to build and push Docker images
231
- build_and_push_images() {
232
- print_status "Building and pushing Docker images..."
233
-
234
- # Build backend image
235
- print_status "Building backend image..."
236
- docker build -t "gcr.io/${PROJECT_ID}/knowledge-assistant-backend:latest" \
237
- -f "${SCRIPT_DIR}/Dockerfile" "${SCRIPT_DIR}"
238
-
239
- # Build frontend image
240
- print_status "Building frontend image..."
241
- docker build -t "gcr.io/${PROJECT_ID}/knowledge-assistant-frontend:latest" \
242
- -f "${SCRIPT_DIR}/rag-quest-hub/Dockerfile" "${SCRIPT_DIR}/rag-quest-hub"
243
-
244
- # Configure Docker for GCR
245
- gcloud auth configure-docker
246
-
247
- # Push images
248
- print_status "Pushing backend image..."
249
- docker push "gcr.io/${PROJECT_ID}/knowledge-assistant-backend:latest"
250
-
251
- print_status "Pushing frontend image..."
252
- docker push "gcr.io/${PROJECT_ID}/knowledge-assistant-frontend:latest"
253
-
254
- print_success "Docker images built and pushed"
255
- }
256
-
257
- # Function to deploy services
258
- deploy_services() {
259
- print_status "Deploying services to Cloud Run..."
260
-
261
- # Deploy Qdrant service first
262
- print_status "Deploying Qdrant service..."
263
- gcloud run deploy knowledge-assistant-qdrant \
264
- --image=qdrant/qdrant:latest \
265
- --platform=managed \
266
- --region="$REGION" \
267
- --memory=512Mi \
268
- --cpu=1 \
269
- --max-instances=5 \
270
- --min-instances=1 \
271
- --port=6333 \
272
- --service-account="knowledge-assistant-qdrant-sa@${PROJECT_ID}.iam.gserviceaccount.com" \
273
- --set-env-vars="QDRANT__SERVICE__HTTP_PORT=6333,QDRANT__SERVICE__GRPC_PORT=6334" \
274
- --allow-unauthenticated
275
-
276
- # Get Qdrant service URL
277
- QDRANT_URL=$(gcloud run services describe knowledge-assistant-qdrant --region="$REGION" --format="value(status.url)")
278
- print_success "Qdrant deployed at: $QDRANT_URL"
279
-
280
- # Deploy backend service
281
- print_status "Deploying backend service..."
282
- gcloud run deploy knowledge-assistant-backend \
283
- --image="gcr.io/${PROJECT_ID}/knowledge-assistant-backend:latest" \
284
- --platform=managed \
285
- --region="$REGION" \
286
- --memory=1Gi \
287
- --cpu=1 \
288
- --max-instances=10 \
289
- --min-instances=0 \
290
- --port=8000 \
291
- --service-account="knowledge-assistant-backend-sa@${PROJECT_ID}.iam.gserviceaccount.com" \
292
- --add-cloudsql-instances="${PROJECT_ID}:${REGION}:knowledge-assistant-db" \
293
- --update-secrets="DATABASE_URL=knowledge-assistant-secrets:DATABASE_URL:latest" \
294
- --update-secrets="JWT_SECRET=knowledge-assistant-secrets:JWT_SECRET:latest" \
295
- --update-secrets="GEMINI_API_KEY=knowledge-assistant-secrets:GEMINI_API_KEY:latest" \
296
- --set-env-vars="QDRANT_HOST=${QDRANT_URL},QDRANT_PORT=443,PYTHONUNBUFFERED=1,PYTHONDONTWRITEBYTECODE=1,USER_REGISTRATION_ENABLED=true,EMAIL_VERIFICATION_REQUIRED=false,JWT_LIFETIME_SECONDS=3600" \
297
- --allow-unauthenticated
298
-
299
- # Get backend service URL
300
- BACKEND_URL=$(gcloud run services describe knowledge-assistant-backend --region="$REGION" --format="value(status.url)")
301
- print_success "Backend deployed at: $BACKEND_URL"
302
-
303
- # Deploy frontend service
304
- print_status "Deploying frontend service..."
305
- gcloud run deploy knowledge-assistant-frontend \
306
- --image="gcr.io/${PROJECT_ID}/knowledge-assistant-frontend:latest" \
307
- --platform=managed \
308
- --region="$REGION" \
309
- --memory=512Mi \
310
- --cpu=1 \
311
- --max-instances=10 \
312
- --min-instances=0 \
313
- --port=8080 \
314
- --set-env-vars="VITE_API_BASE_URL=${BACKEND_URL},VITE_API_TIMEOUT=30000,VITE_ENABLE_REGISTRATION=true" \
315
- --allow-unauthenticated
316
-
317
- # Get frontend service URL
318
- FRONTEND_URL=$(gcloud run services describe knowledge-assistant-frontend --region="$REGION" --format="value(status.url)")
319
- print_success "Frontend deployed at: $FRONTEND_URL"
320
-
321
- # Update backend CORS settings
322
- print_status "Updating backend CORS settings..."
323
- gcloud run services update knowledge-assistant-backend \
324
- --region="$REGION" \
325
- --update-env-vars="CORS_ORIGINS=${FRONTEND_URL}"
326
-
327
- print_success "All services deployed successfully!"
328
-
329
- # Display deployment summary
330
- echo ""
331
- echo "=== DEPLOYMENT SUMMARY ==="
332
- echo "Frontend URL: $FRONTEND_URL"
333
- echo "Backend URL: $BACKEND_URL"
334
- echo "Qdrant URL: $QDRANT_URL"
335
- echo "=========================="
336
- }
337
-
338
- # Function to run health checks
339
- run_health_checks() {
340
- print_status "Running health checks..."
341
-
342
- # Get service URLs
343
- FRONTEND_URL=$(gcloud run services describe knowledge-assistant-frontend --region="$REGION" --format="value(status.url)")
344
- BACKEND_URL=$(gcloud run services describe knowledge-assistant-backend --region="$REGION" --format="value(status.url)")
345
- QDRANT_URL=$(gcloud run services describe knowledge-assistant-qdrant --region="$REGION" --format="value(status.url)")
346
-
347
- # Check Qdrant health
348
- print_status "Checking Qdrant health..."
349
- if curl -f "${QDRANT_URL}/health" &>/dev/null; then
350
- print_success "Qdrant is healthy"
351
- else
352
- print_warning "Qdrant health check failed"
353
- fi
354
-
355
- # Check backend health
356
- print_status "Checking backend health..."
357
- if curl -f "${BACKEND_URL}/health" &>/dev/null; then
358
- print_success "Backend is healthy"
359
- else
360
- print_warning "Backend health check failed"
361
- fi
362
-
363
- # Check frontend
364
- print_status "Checking frontend..."
365
- if curl -f "$FRONTEND_URL" &>/dev/null; then
366
- print_success "Frontend is accessible"
367
- else
368
- print_warning "Frontend accessibility check failed"
369
- fi
370
-
371
- print_success "Health checks completed"
372
- }
373
-
374
- # Main deployment function
375
- main() {
376
- print_status "Starting Cloud Run deployment for Knowledge Assistant..."
377
-
378
- check_prerequisites
379
- load_environment
380
- setup_gcloud
381
- create_secrets
382
- create_service_accounts
383
- create_cloud_sql
384
- build_and_push_images
385
- deploy_services
386
- run_health_checks
387
-
388
- print_success "Deployment completed successfully!"
389
- print_status "You can now access your application at the frontend URL shown above."
390
- }
391
-
392
- # Handle script arguments
393
- case "${1:-}" in
394
- "secrets")
395
- load_environment
396
- create_secrets
397
- ;;
398
- "build")
399
- load_environment
400
- build_and_push_images
401
- ;;
402
- "deploy")
403
- load_environment
404
- deploy_services
405
- ;;
406
- "health")
407
- load_environment
408
- run_health_checks
409
- ;;
410
- "")
411
- main
412
- ;;
413
- *)
414
- echo "Usage: $0 [secrets|build|deploy|health]"
415
- echo " secrets - Create secrets only"
416
- echo " build - Build and push images only"
417
- echo " deploy - Deploy services only"
418
- echo " health - Run health checks only"
419
- echo " (no args) - Run full deployment"
420
- exit 1
421
- ;;
422
- esac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deploy-railway.sh CHANGED
@@ -111,7 +111,12 @@ setup_railway_project() {
111
  case $choice in
112
  1)
113
  log "Creating new Railway project..."
114
- railway new "$PROJECT_NAME"
 
 
 
 
 
115
  ;;
116
  2)
117
  log "Connecting to existing Railway project..."
@@ -130,16 +135,15 @@ setup_railway_project() {
130
  deploy_backend() {
131
  log "Deploying backend service..."
132
 
133
- # Check if backend service exists
134
- if ! railway service list | grep -q "$BACKEND_SERVICE"; then
135
- log "Creating backend service..."
136
- railway service create "$BACKEND_SERVICE"
137
- fi
138
 
139
- # Switch to backend service
140
- railway service use "$BACKEND_SERVICE"
 
141
 
142
- # Set environment variables
143
  log "Setting backend environment variables..."
144
  source .env.railway
145
 
@@ -157,23 +161,18 @@ deploy_backend() {
157
  railway variables set QDRANT_CLOUD_URL="$QDRANT_CLOUD_URL"
158
  railway variables set QDRANT_API_KEY="$QDRANT_API_KEY"
159
  else
160
- railway variables set QDRANT_HOST="$QDRANT_HOST"
161
- railway variables set QDRANT_PORT="$QDRANT_PORT"
162
  fi
163
 
164
- if [ -n "$OPENAI_API_KEY" ]; then
 
 
 
165
  railway variables set OPENAI_API_KEY="$OPENAI_API_KEY"
166
  railway variables set USE_OPENAI_INSTEAD_OF_OLLAMA="$USE_OPENAI_INSTEAD_OF_OLLAMA"
167
- else
168
- railway variables set OLLAMA_HOST="$OLLAMA_HOST"
169
- railway variables set OLLAMA_PORT="$OLLAMA_PORT"
170
- railway variables set OLLAMA_MODEL="$OLLAMA_MODEL"
171
  fi
172
 
173
- # Deploy backend
174
- log "Deploying backend code..."
175
- railway up --detach
176
-
177
  success "Backend deployment initiated"
178
  }
179
 
@@ -181,39 +180,26 @@ deploy_backend() {
181
  deploy_frontend() {
182
  log "Deploying frontend service..."
183
 
184
- # Get backend URL
185
- railway service use "$BACKEND_SERVICE"
186
- BACKEND_URL=$(railway domain | head -n1)
187
-
188
- if [ -z "$BACKEND_URL" ]; then
189
- warning "Backend URL not available yet. You may need to set VITE_API_BASE_URL manually later."
190
- BACKEND_URL="https://your-backend.railway.app"
191
- else
192
- BACKEND_URL="https://$BACKEND_URL"
193
- fi
194
 
195
  # Switch to frontend directory
196
  cd rag-quest-hub
197
 
198
- # Check if frontend service exists
199
- if ! railway service list | grep -q "$FRONTEND_SERVICE"; then
200
- log "Creating frontend service..."
201
- railway service create "$FRONTEND_SERVICE"
202
- fi
203
-
204
- # Switch to frontend service
205
- railway service use "$FRONTEND_SERVICE"
206
-
207
- # Set frontend environment variables
208
- log "Setting frontend environment variables..."
209
- railway variables set VITE_API_BASE_URL="$BACKEND_URL"
210
- railway variables set VITE_API_TIMEOUT="$VITE_API_TIMEOUT"
211
- railway variables set VITE_ENABLE_REGISTRATION="$VITE_ENABLE_REGISTRATION"
212
 
213
  # Deploy frontend
214
  log "Deploying frontend code..."
215
  railway up --detach
216
 
 
 
 
 
 
 
217
  # Return to project root
218
  cd ..
219
 
 
111
  case $choice in
112
  1)
113
  log "Creating new Railway project..."
114
+ if railway init --help | grep -q -- '--name'; then
115
+ railway init --name "$PROJECT_NAME"
116
+ else
117
+ railway init "$PROJECT_NAME"
118
+ fi
119
+
120
  ;;
121
  2)
122
  log "Connecting to existing Railway project..."
 
135
  deploy_backend() {
136
  log "Deploying backend service..."
137
 
138
+ # Railway automatically creates services when you deploy
139
+ # No need to explicitly create services anymore
140
+ log "Deploying backend service..."
 
 
141
 
142
+ # Deploy backend first
143
+ log "Deploying backend code..."
144
+ railway up --detach
145
 
146
+ # Set environment variables after deployment
147
  log "Setting backend environment variables..."
148
  source .env.railway
149
 
 
161
  railway variables set QDRANT_CLOUD_URL="$QDRANT_CLOUD_URL"
162
  railway variables set QDRANT_API_KEY="$QDRANT_API_KEY"
163
  else
164
+ railway variables set QDRANT_HOST="qdrant"
165
+ railway variables set QDRANT_PORT="6333"
166
  fi
167
 
168
+ if [ -n "$GEMINI_API_KEY" ]; then
169
+ railway variables set GEMINI_API_KEY="$GEMINI_API_KEY"
170
+ railway variables set USE_GEMINI_API="true"
171
+ elif [ -n "$OPENAI_API_KEY" ]; then
172
  railway variables set OPENAI_API_KEY="$OPENAI_API_KEY"
173
  railway variables set USE_OPENAI_INSTEAD_OF_OLLAMA="$USE_OPENAI_INSTEAD_OF_OLLAMA"
 
 
 
 
174
  fi
175
 
 
 
 
 
176
  success "Backend deployment initiated"
177
  }
178
 
 
180
  deploy_frontend() {
181
  log "Deploying frontend service..."
182
 
183
+ # Get backend URL from Railway dashboard or use placeholder
184
+ BACKEND_URL="https://your-backend.railway.app"
185
+ warning "You'll need to update VITE_API_BASE_URL with the actual backend URL after deployment"
 
 
 
 
 
 
 
186
 
187
  # Switch to frontend directory
188
  cd rag-quest-hub
189
 
190
+ # Railway will automatically create the service when deploying
191
+ log "Deploying frontend service..."
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  # Deploy frontend
194
  log "Deploying frontend code..."
195
  railway up --detach
196
 
197
+ # Set frontend environment variables after deployment
198
+ log "Setting frontend environment variables..."
199
+ railway variables set VITE_API_BASE_URL="$BACKEND_URL"
200
+ railway variables set VITE_API_TIMEOUT="30000"
201
+ railway variables set VITE_ENABLE_REGISTRATION="true"
202
+
203
  # Return to project root
204
  cd ..
205
 
deploy.sh DELETED
@@ -1,549 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Master Deployment Script for Knowledge Assistant RAG
4
- # This script provides an interactive interface to deploy to various platforms
5
-
6
- set -e # Exit on any error
7
-
8
- # Colors for output
9
- RED='\033[0;31m'
10
- GREEN='\033[0;32m'
11
- YELLOW='\033[1;33m'
12
- BLUE='\033[0;34m'
13
- CYAN='\033[0;36m'
14
- BOLD='\033[1m'
15
- NC='\033[0m' # No Color
16
-
17
- # Configuration
18
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
19
- VERSION="1.0.0"
20
-
21
- # Logging functions
22
- log() {
23
- echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
24
- }
25
-
26
- error() {
27
- echo -e "${RED}[ERROR]${NC} $1" >&2
28
- }
29
-
30
- success() {
31
- echo -e "${GREEN}[SUCCESS]${NC} $1"
32
- }
33
-
34
- warning() {
35
- echo -e "${YELLOW}[WARNING]${NC} $1"
36
- }
37
-
38
- info() {
39
- echo -e "${CYAN}[INFO]${NC} $1"
40
- }
41
-
42
- # Display banner
43
- show_banner() {
44
- echo -e "${BOLD}${CYAN}"
45
- echo "╔══════════════════════════════════════════════════════════════╗"
46
- echo "║ Knowledge Assistant RAG ║"
47
- echo "║ Deployment Manager v${VERSION} ║"
48
- echo "║ ║"
49
- echo "║ Deploy your RAG application to multiple cloud platforms ║"
50
- echo "╚══════════════════════════════════════════════════════════════╝"
51
- echo -e "${NC}"
52
- }
53
-
54
- # Show help information
55
- show_help() {
56
- echo "Knowledge Assistant RAG Deployment Manager"
57
- echo ""
58
- echo "Usage: $0 [OPTIONS] [PLATFORM]"
59
- echo ""
60
- echo "Platforms:"
61
- echo " railway Deploy to Railway.app (free tier)"
62
- echo " fly Deploy to Fly.io (free tier)"
63
- echo " cloudrun Deploy to Google Cloud Run"
64
- echo " vercel Deploy to Vercel (hybrid deployment)"
65
- echo " local Deploy locally with Docker"
66
- echo ""
67
- echo "Options:"
68
- echo " -h, --help Show this help message"
69
- echo " -v, --version Show version information"
70
- echo " --validate-only Only validate environment and prerequisites"
71
- echo " --dry-run Show what would be deployed without executing"
72
- echo " --force Skip confirmation prompts"
73
- echo " --backend-only Deploy only backend services"
74
- echo " --frontend-only Deploy only frontend services"
75
- echo ""
76
- echo "Examples:"
77
- echo " $0 # Interactive platform selection"
78
- echo " $0 railway # Deploy to Railway"
79
- echo " $0 --validate-only # Check prerequisites only"
80
- echo " $0 cloudrun --dry-run # Show Cloud Run deployment plan"
81
- echo ""
82
- }
83
-
84
- # Show version information
85
- show_version() {
86
- echo "Knowledge Assistant RAG Deployment Manager v${VERSION}"
87
- echo "Copyright (c) 2024"
88
- }
89
-
90
- # Check system prerequisites
91
- check_system_prerequisites() {
92
- log "Checking system prerequisites..."
93
-
94
- local missing_tools=()
95
-
96
- # Check for required tools
97
- if ! command -v docker &> /dev/null; then
98
- missing_tools+=("docker")
99
- fi
100
-
101
- if ! command -v curl &> /dev/null; then
102
- missing_tools+=("curl")
103
- fi
104
-
105
- if ! command -v git &> /dev/null; then
106
- missing_tools+=("git")
107
- fi
108
-
109
- # Check Docker daemon
110
- if command -v docker &> /dev/null; then
111
- if ! docker info &> /dev/null; then
112
- error "Docker daemon is not running. Please start Docker."
113
- return 1
114
- fi
115
- fi
116
-
117
- if [ ${#missing_tools[@]} -ne 0 ]; then
118
- error "Missing required tools: ${missing_tools[*]}"
119
- echo "Please install the missing tools and try again."
120
- return 1
121
- fi
122
-
123
- success "System prerequisites check passed"
124
- return 0
125
- }
126
-
127
- # Validate project structure
128
- validate_project_structure() {
129
- log "Validating project structure..."
130
-
131
- local required_files=(
132
- "Dockerfile"
133
- "docker-compose.yml"
134
- "requirements.txt"
135
- "rag-quest-hub/package.json"
136
- "rag-quest-hub/Dockerfile"
137
- )
138
-
139
- local missing_files=()
140
-
141
- for file in "${required_files[@]}"; do
142
- if [ ! -f "$file" ]; then
143
- missing_files+=("$file")
144
- fi
145
- done
146
-
147
- if [ ${#missing_files[@]} -ne 0 ]; then
148
- error "Missing required files: ${missing_files[*]}"
149
- return 1
150
- fi
151
-
152
- success "Project structure validation passed"
153
- return 0
154
- }
155
-
156
- # Check platform-specific prerequisites
157
- check_platform_prerequisites() {
158
- local platform=$1
159
-
160
- case $platform in
161
- railway)
162
- if ! command -v railway &> /dev/null; then
163
- error "Railway CLI not found. Install with: npm install -g @railway/cli"
164
- return 1
165
- fi
166
- if ! railway whoami &> /dev/null; then
167
- error "Not authenticated with Railway. Run: railway login"
168
- return 1
169
- fi
170
- ;;
171
- fly)
172
- if ! command -v flyctl &> /dev/null; then
173
- error "Fly CLI not found. Install from: https://fly.io/docs/getting-started/installing-flyctl/"
174
- return 1
175
- fi
176
- if ! flyctl auth whoami &> /dev/null; then
177
- error "Not authenticated with Fly.io. Run: flyctl auth login"
178
- return 1
179
- fi
180
- ;;
181
- cloudrun)
182
- if ! command -v gcloud &> /dev/null; then
183
- error "Google Cloud CLI not found. Install from: https://cloud.google.com/sdk/docs/install"
184
- return 1
185
- fi
186
- if ! gcloud auth list --filter=status:ACTIVE --format="value(account)" | head -n1 &> /dev/null; then
187
- error "Not authenticated with Google Cloud. Run: gcloud auth login"
188
- return 1
189
- fi
190
- ;;
191
- vercel)
192
- if ! command -v vercel &> /dev/null; then
193
- error "Vercel CLI not found. Install with: npm install -g vercel"
194
- return 1
195
- fi
196
- if ! vercel whoami &> /dev/null; then
197
- error "Not authenticated with Vercel. Run: vercel login"
198
- return 1
199
- fi
200
- ;;
201
- local)
202
- # Local deployment only needs Docker
203
- ;;
204
- *)
205
- error "Unknown platform: $platform"
206
- return 1
207
- ;;
208
- esac
209
-
210
- success "Platform prerequisites for $platform are satisfied"
211
- return 0
212
- }
213
-
214
- # Validate environment configuration
215
- validate_environment() {
216
- local platform=$1
217
- log "Validating environment configuration for $platform..."
218
-
219
- local env_file=""
220
- case $platform in
221
- railway)
222
- env_file=".env.railway"
223
- ;;
224
- fly)
225
- env_file=".env.fly"
226
- ;;
227
- cloudrun)
228
- env_file=".env.cloudrun"
229
- ;;
230
- vercel)
231
- env_file=".env.vercel"
232
- ;;
233
- local)
234
- env_file=".env.production"
235
- ;;
236
- esac
237
-
238
- if [ ! -f "$env_file" ]; then
239
- warning "Environment file $env_file not found"
240
-
241
- local template_file="${env_file}.template"
242
- if [ -f "$template_file" ]; then
243
- info "Creating $env_file from template..."
244
- cp "$template_file" "$env_file"
245
- warning "Please edit $env_file with your configuration before continuing"
246
-
247
- if [ "$FORCE_DEPLOY" != "true" ]; then
248
- read -p "Press Enter after editing $env_file, or Ctrl+C to cancel..."
249
- fi
250
- else
251
- error "Template file $template_file not found"
252
- return 1
253
- fi
254
- fi
255
-
256
- # Source and validate environment variables
257
- source "$env_file"
258
-
259
- # Check JWT_SECRET
260
- if [ -z "$JWT_SECRET" ] || [[ "$JWT_SECRET" == *"change"* ]] || [[ "$JWT_SECRET" == *"your-"* ]]; then
261
- error "JWT_SECRET must be set to a secure value (32+ characters)"
262
- return 1
263
- fi
264
-
265
- if [ ${#JWT_SECRET} -lt 32 ]; then
266
- error "JWT_SECRET must be at least 32 characters long"
267
- return 1
268
- fi
269
-
270
- success "Environment configuration validated"
271
- return 0
272
- }
273
-
274
- # Show deployment plan
275
- show_deployment_plan() {
276
- local platform=$1
277
- local services=$2
278
-
279
- echo ""
280
- echo -e "${BOLD}Deployment Plan${NC}"
281
- echo "================"
282
- echo "Platform: $platform"
283
- echo "Services: $services"
284
- echo ""
285
-
286
- case $platform in
287
- railway)
288
- echo "Railway.app Deployment:"
289
- echo "• Backend: FastAPI application"
290
- echo "• Frontend: React/Vite application"
291
- echo "• Database: Railway PostgreSQL (optional)"
292
- echo "• Vector DB: Qdrant container"
293
- echo "• LLM: Google Gemini API"
294
- echo "• Resource limits: 512MB RAM, 1GB storage"
295
- ;;
296
- fly)
297
- echo "Fly.io Deployment:"
298
- echo "• Backend: FastAPI application"
299
- echo "• Frontend: React/Vite application"
300
- echo "• Database: SQLite with persistent volumes"
301
- echo "• Vector DB: Qdrant container"
302
- echo "• LLM: Google Gemini API"
303
- echo "• Resource limits: 256MB RAM, 1GB storage"
304
- ;;
305
- cloudrun)
306
- echo "Google Cloud Run Deployment:"
307
- echo "• Backend: FastAPI container"
308
- echo "• Frontend: React/Vite container"
309
- echo "• Database: Cloud SQL PostgreSQL"
310
- echo "• Vector DB: Qdrant container"
311
- echo "• LLM: Google Gemini API"
312
- echo "• Resource limits: 1GB memory, 2 vCPU"
313
- ;;
314
- vercel)
315
- echo "Vercel Hybrid Deployment:"
316
- echo "• Frontend: Static site on Vercel"
317
- echo "• Backend: Serverless functions on Vercel"
318
- echo "• Database: External managed service"
319
- echo "• Vector DB: Qdrant Cloud"
320
- echo "• LLM: Google Gemini API"
321
- ;;
322
- local)
323
- echo "Local Docker Deployment:"
324
- echo "• Backend: FastAPI container"
325
- echo "• Frontend: React/Vite container"
326
- echo "• Database: SQLite in volume"
327
- echo "• Vector DB: Qdrant container"
328
- echo "• LLM: Google Gemini API"
329
- ;;
330
- esac
331
- echo ""
332
- }
333
-
334
- # Interactive platform selection
335
- select_platform() {
336
- echo ""
337
- echo -e "${BOLD}Select Deployment Platform:${NC}"
338
- echo ""
339
- echo "1) Railway.app (Free tier: 512MB RAM, 1GB storage)"
340
- echo "2) Fly.io (Free tier: 256MB RAM, 1GB storage)"
341
- echo "3) Google Cloud Run (Free tier: 1GB memory, 2 vCPU)"
342
- echo "4) Vercel (Hybrid: Static frontend + serverless backend)"
343
- echo "5) Local Docker (Development/testing)"
344
- echo ""
345
-
346
- while true; do
347
- read -p "Enter your choice (1-5): " choice
348
- case $choice in
349
- 1) echo "railway"; return ;;
350
- 2) echo "fly"; return ;;
351
- 3) echo "cloudrun"; return ;;
352
- 4) echo "vercel"; return ;;
353
- 5) echo "local"; return ;;
354
- *) echo "Invalid choice. Please enter 1-5." ;;
355
- esac
356
- done
357
- }
358
-
359
- # Execute deployment
360
- execute_deployment() {
361
- local platform=$1
362
- local services=$2
363
-
364
- log "Starting deployment to $platform..."
365
-
366
- case $platform in
367
- railway)
368
- if [ "$services" = "backend-only" ]; then
369
- bash "$SCRIPT_DIR/deploy-railway.sh" --backend-only
370
- elif [ "$services" = "frontend-only" ]; then
371
- bash "$SCRIPT_DIR/deploy-railway.sh" --frontend-only
372
- else
373
- bash "$SCRIPT_DIR/deploy-railway.sh"
374
- fi
375
- ;;
376
- fly)
377
- # Fly.io deployment would be implemented here
378
- error "Fly.io deployment not yet implemented"
379
- return 1
380
- ;;
381
- cloudrun)
382
- bash "$SCRIPT_DIR/deploy-cloudrun.sh"
383
- ;;
384
- vercel)
385
- # Vercel deployment would be implemented here
386
- error "Vercel deployment not yet implemented"
387
- return 1
388
- ;;
389
- local)
390
- bash "$SCRIPT_DIR/deploy-production.sh"
391
- ;;
392
- *)
393
- error "Unknown platform: $platform"
394
- return 1
395
- ;;
396
- esac
397
- }
398
-
399
- # Rollback deployment
400
- rollback_deployment() {
401
- local platform=$1
402
-
403
- warning "Rolling back deployment on $platform..."
404
-
405
- case $platform in
406
- railway)
407
- railway service list | grep -E "(backend|frontend)" | while read -r service; do
408
- service_name=$(echo "$service" | awk '{print $1}')
409
- warning "Rolling back $service_name..."
410
- railway rollback --service "$service_name" || true
411
- done
412
- ;;
413
- cloudrun)
414
- warning "Cloud Run rollback requires manual intervention via Google Cloud Console"
415
- ;;
416
- local)
417
- docker-compose -f docker-compose.prod.yml down || true
418
- ;;
419
- *)
420
- warning "Rollback not implemented for $platform"
421
- ;;
422
- esac
423
- }
424
-
425
- # Main deployment function
426
- main() {
427
- local platform=""
428
- local services="all"
429
- local validate_only=false
430
- local dry_run=false
431
-
432
- # Parse command line arguments
433
- while [[ $# -gt 0 ]]; do
434
- case $1 in
435
- -h|--help)
436
- show_help
437
- exit 0
438
- ;;
439
- -v|--version)
440
- show_version
441
- exit 0
442
- ;;
443
- --validate-only)
444
- validate_only=true
445
- shift
446
- ;;
447
- --dry-run)
448
- dry_run=true
449
- shift
450
- ;;
451
- --force)
452
- FORCE_DEPLOY=true
453
- shift
454
- ;;
455
- --backend-only)
456
- services="backend-only"
457
- shift
458
- ;;
459
- --frontend-only)
460
- services="frontend-only"
461
- shift
462
- ;;
463
- railway|fly|cloudrun|vercel|local)
464
- platform=$1
465
- shift
466
- ;;
467
- *)
468
- error "Unknown option: $1"
469
- show_help
470
- exit 1
471
- ;;
472
- esac
473
- done
474
-
475
- # Show banner
476
- show_banner
477
-
478
- # Check system prerequisites
479
- if ! check_system_prerequisites; then
480
- exit 1
481
- fi
482
-
483
- # Validate project structure
484
- if ! validate_project_structure; then
485
- exit 1
486
- fi
487
-
488
- # Select platform if not provided
489
- if [ -z "$platform" ]; then
490
- platform=$(select_platform)
491
- fi
492
-
493
- # Check platform prerequisites
494
- if ! check_platform_prerequisites "$platform"; then
495
- exit 1
496
- fi
497
-
498
- # Validate environment
499
- if ! validate_environment "$platform"; then
500
- exit 1
501
- fi
502
-
503
- # Show deployment plan
504
- show_deployment_plan "$platform" "$services"
505
-
506
- # Exit if validate-only
507
- if [ "$validate_only" = true ]; then
508
- success "Validation completed successfully"
509
- exit 0
510
- fi
511
-
512
- # Exit if dry-run
513
- if [ "$dry_run" = true ]; then
514
- info "Dry run completed - no deployment executed"
515
- exit 0
516
- fi
517
-
518
- # Confirm deployment
519
- if [ "$FORCE_DEPLOY" != "true" ]; then
520
- echo -n "Proceed with deployment? (y/N): "
521
- read -r confirm
522
- if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
523
- info "Deployment cancelled"
524
- exit 0
525
- fi
526
- fi
527
-
528
- # Execute deployment with error handling
529
- if ! execute_deployment "$platform" "$services"; then
530
- error "Deployment failed"
531
-
532
- if [ "$FORCE_DEPLOY" != "true" ]; then
533
- echo -n "Attempt rollback? (y/N): "
534
- read -r rollback_confirm
535
- if [[ "$rollback_confirm" =~ ^[Yy]$ ]]; then
536
- rollback_deployment "$platform"
537
- fi
538
- fi
539
-
540
- exit 1
541
- fi
542
-
543
- success "Deployment completed successfully!"
544
- }
545
-
546
- # Handle script execution
547
- if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
548
- main "$@"
549
- fi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main-simple.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import os
4
+ from datetime import datetime
5
+
6
+ app = FastAPI(
7
+ title="Knowledge Assistant RAG API",
8
+ description="API for document upload and knowledge base querying",
9
+ version="1.0.0"
10
+ )
11
+
12
+ # Configure CORS
13
+ cors_origins = os.getenv("CORS_ORIGINS", "*").split(",")
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=cors_origins,
17
+ allow_credentials=True,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ @app.get("/")
23
+ async def root():
24
+ """Root endpoint"""
25
+ return {
26
+ "message": "Knowledge Assistant RAG API",
27
+ "status": "running",
28
+ "timestamp": datetime.utcnow().isoformat()
29
+ }
30
+
31
+ @app.get("/health")
32
+ async def health_check():
33
+ """Simple health check endpoint"""
34
+ return {
35
+ "status": "ok",
36
+ "timestamp": datetime.utcnow().isoformat(),
37
+ "service": "knowledge-assistant-api"
38
+ }
39
+
40
+ @app.get("/health/simple")
41
+ async def simple_health_check():
42
+ """Simple health check endpoint for basic monitoring."""
43
+ return {
44
+ "status": "ok",
45
+ "timestamp": datetime.utcnow().isoformat(),
46
+ "service": "knowledge-assistant-api"
47
+ }
planning.md DELETED
@@ -1,116 +0,0 @@
1
- Build a Knowledge Assistant that allows users to upload documents (PDFs, text), indexes them, and answers queries based on document content using **RAG (Retrieval-Augmented Generation)** — all hosted on your own infrastructure.
2
-
3
- ---
4
-
5
- ## **Stack (All Open-Source):**
6
-
7
- | Component | Tool |
8
- | --- | --- |
9
- | Backend API | **FastAPI** |
10
- | Document Parsing | **PyMuPDF**, **pdfminer**, **BeautifulSoup** |
11
- | Embedding Models | **sentence-transformers (BERT-based models)** |
12
- | Vector Database | **Qdrant** (or **Weaviate**) |
13
- | RAG Orchestration | **LangChain** or **LlamaIndex** |
14
- | LLM (for generation) | **Open Source Models via Ollama** (e.g., **LLaMA 3**, **Mistral 7B**, **Phi-3** using **llama.cpp** backend) |
15
- | Frontend | Minimal HTML/JS or **React** (optional) |
16
- | Deployment | Docker Compose / K8s (Optional) |
17
- | Authentication | Simple JWT with **FastAPI Users** |
18
- | Model Serving | **llama.cpp**, **Ollama**, **vLLM** |
19
-
20
- ---
21
-
22
- ## **Full Roadmap:**
23
-
24
- ---
25
-
26
- ### **Phase 1 — Research & Setup (Week 1-2)**
27
-
28
- - [ ] Finalize stack: FastAPI, Qdrant, LangChain, Ollama, Llama.cpp.
29
- - [ ] Setup local dev environment.
30
- - [ ] Install & run Qdrant locally via Docker.
31
- - [ ] Install LangChain & try basic document loading examples.
32
- - [ ] Setup Ollama with LLaMA or Mistral models.
33
- - [ ] Explore sentence-transformers for embeddings.
34
-
35
- ---
36
-
37
- ### **Phase 2 — Backend API MVP (Week 3-4)**
38
-
39
- - [ ] Implement FastAPI endpoints:
40
- - [ ] `/upload` — Upload document.
41
- - [ ] `/query` — Query endpoint.
42
- - [ ] `/health` — Health check.
43
- - [ ] Use PyMuPDF for PDF parsing & text extraction.
44
- - [ ] Chunk documents (LangChain text splitter).
45
- - [ ] Generate embeddings (sentence-transformers).
46
- - [ ] Store chunks + metadata in Qdrant.
47
- - [ ] Implement query logic:
48
- - [ ] Accept query → Embed → Retrieve top-k from Qdrant.
49
- - [ ] Format context for LLM prompt.
50
- - [ ] Call LLM via Ollama/llama.cpp.
51
- - [ ] Return answer.
52
-
53
- ---
54
-
55
- ### **Phase 3 — Frontend & Integration (Week 5)**
56
-
57
- - [ ] Build minimal frontend (React or plain HTML) for:
58
- - Uploading documents.
59
- - Query input/output.
60
- - [ ] WebSocket (optional) for live querying.
61
- - [ ] Implement Authentication (JWT via FastAPI Users).
62
-
63
- ---
64
-
65
- ### **Phase 4 — Deployment & Scaling (Week 6)**
66
-
67
- - [ ] Dockerize FastAPI + Qdrant.
68
- - [ ] Deploy Ollama with LLM models.
69
- - [ ] Optional: Deploy on VPS or self-hosted server.
70
- - [ ] Setup simple monitoring (Prometheus/Grafana optional).
71
- - [ ] Write Deployment Guide.
72
-
73
- ---
74
-
75
- ### **Phase 5 — Polish & Documentation (Week 7)**
76
-
77
- - [ ] Write API Docs (Swagger/OpenAPI via FastAPI).
78
- - [ ] Create GitHub README with:
79
- - Architecture Diagram.
80
- - Setup Instructions.
81
- - Sample Queries.
82
- - [ ] Write a Medium/LinkedIn Post explaining your build.
83
- - [ ] Record demo video.
84
-
85
- ---
86
-
87
- ## **Architecture Overview:**
88
-
89
- ```
90
- [Frontend] --> [FastAPI Backend] --> [Qdrant for Retrieval]
91
- --> [Ollama/llama.cpp for LLM]
92
-
93
- ```
94
-
95
- ---
96
-
97
- ## **Sample Features List for MVP:**
98
-
99
- - Document upload with parsing & chunking.
100
- - Query API with context retrieval & generation.
101
- - Local embedding + storage using vector DB.
102
- - Open-source LLM serving locally.
103
- - API secured with simple JWT.
104
-
105
- ---
106
-
107
- ## **Optional Advanced Features (Post-MVP):**
108
-
109
- - Multi-user support with auth.
110
- - Document categories/tags.
111
- - Query history & logging.
112
- - LLM selection on runtime (switch models).
113
- - LangChain Agent support.
114
- - Frontend with context preview.
115
-
116
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/.env.vercel CHANGED
@@ -1,5 +1,5 @@
1
  # Vercel deployment environment configuration
2
- VITE_API_BASE_URL=/api
3
  VITE_API_TIMEOUT=30000
4
  VITE_QUERY_TIMEOUT=60000
5
 
 
1
  # Vercel deployment environment configuration
2
+ VITE_API_BASE_URL=https://knowledge-assistant-rag-production.up.railway.app
3
  VITE_API_TIMEOUT=30000
4
  VITE_QUERY_TIMEOUT=60000
5
 
rag-quest-hub/.gitignore CHANGED
@@ -22,3 +22,141 @@ dist-ssr
22
  *.njsproj
23
  *.sln
24
  *.sw?
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  *.njsproj
23
  *.sln
24
  *.sw?
25
+ .vercel
26
+ # ===========================================
27
+ # FRONTEND SECURITY & DEPLOYMENT
28
+ # ===========================================
29
+
30
+ # Environment files with API keys (CRITICAL)
31
+ .env
32
+ .env.local
33
+ .env.development.local
34
+ .env.test.local
35
+ .env.production.local
36
+ .env.vercel
37
+ .env.vercel.local
38
+ .env.*
39
+ !.env.example
40
+
41
+ # Vercel deployment files
42
+ .vercel/
43
+ .vercel
44
+
45
+ # Build artifacts
46
+ dist/
47
+ build/
48
+ out/
49
+ .next/
50
+ .nuxt/
51
+
52
+ # ===========================================
53
+ # DEVELOPMENT & TESTING
54
+ # ===========================================
55
+
56
+ # Test coverage
57
+ coverage/
58
+ .nyc_output/
59
+ test-results/
60
+ playwright-report/
61
+ test-results/
62
+
63
+ # Storybook
64
+ .storybook-out/
65
+ storybook-static/
66
+
67
+ # ===========================================
68
+ # CACHE & TEMPORARY FILES
69
+ # ===========================================
70
+
71
+ # Vite cache
72
+ .vite/
73
+ vite.config.js.timestamp-*
74
+
75
+ # TypeScript cache
76
+ *.tsbuildinfo
77
+
78
+ # ESLint cache
79
+ .eslintcache
80
+
81
+ # Prettier cache
82
+ .prettiercache
83
+
84
+ # Parcel cache
85
+ .parcel-cache/
86
+
87
+ # ===========================================
88
+ # PACKAGE MANAGERS
89
+ # ===========================================
90
+
91
+ # npm
92
+ node_modules/
93
+ npm-debug.log*
94
+ .npm/
95
+
96
+ # Yarn
97
+ yarn-debug.log*
98
+ yarn-error.log*
99
+ .yarn/
100
+ .pnp.*
101
+
102
+ # pnpm
103
+ pnpm-debug.log*
104
+ .pnpm-store/
105
+
106
+ # ===========================================
107
+ # SYSTEM FILES
108
+ # ===========================================
109
+
110
+ # macOS
111
+ .DS_Store
112
+ .AppleDouble
113
+ .LSOverride
114
+
115
+ # Windows
116
+ Thumbs.db
117
+ ehthumbs.db
118
+ Desktop.ini
119
+
120
+ # Linux
121
+ *~
122
+
123
+ # ===========================================
124
+ # IDE & EDITOR
125
+ # ===========================================
126
+
127
+ # VSCode
128
+ .vscode/settings.json
129
+ .vscode/launch.json
130
+ !.vscode/extensions.json
131
+
132
+ # WebStorm/IntelliJ
133
+ .idea/
134
+ *.iml
135
+
136
+ # Vim
137
+ *.swp
138
+ *.swo
139
+
140
+ # ===========================================
141
+ # RUNTIME & LOGS
142
+ # ===========================================
143
+
144
+ # Runtime data
145
+ pids/
146
+ *.pid
147
+ *.seed
148
+ *.pid.lock
149
+
150
+ # Logs
151
+ logs/
152
+ *.log
153
+
154
+ # ===========================================
155
+ # SECURITY
156
+ # ===========================================
157
+
158
+ # API keys and secrets (if accidentally created)
159
+ *api_key*
160
+ *secret*
161
+ *token*
162
+ *.key
rag-quest-hub/api/auth/jwt/login.js DELETED
@@ -1,101 +0,0 @@
1
- import jwt from 'jsonwebtoken';
2
- import bcrypt from 'bcryptjs';
3
- import { getDatabase } from '../../lib/database.js';
4
-
5
- const JWT_SECRET = process.env.JWT_SECRET || 'your-super-secret-jwt-key-here';
6
- const JWT_LIFETIME_SECONDS = parseInt(process.env.JWT_LIFETIME_SECONDS || '3600');
7
-
8
- export default async function handler(req, res) {
9
- // Set CORS headers
10
- res.setHeader('Access-Control-Allow-Origin', '*');
11
- res.setHeader('Access-Control-Allow-Methods', 'POST, OPTIONS');
12
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
13
-
14
- if (req.method === 'OPTIONS') {
15
- return res.status(200).end();
16
- }
17
-
18
- if (req.method !== 'POST') {
19
- return res.status(405).json({
20
- error: 'MethodNotAllowed',
21
- detail: 'Method not allowed',
22
- status_code: 405,
23
- timestamp: new Date().toISOString()
24
- });
25
- }
26
-
27
- try {
28
- const { username, password } = req.body;
29
-
30
- if (!username || !password) {
31
- return res.status(422).json({
32
- error: 'ValidationError',
33
- detail: 'Username and password are required',
34
- status_code: 422,
35
- timestamp: new Date().toISOString()
36
- });
37
- }
38
-
39
- const db = await getDatabase();
40
-
41
- // Find user by email
42
- const user = await db.get(
43
- 'SELECT id, email, hashed_password, is_active, is_superuser, is_verified, created_at, updated_at FROM users WHERE email = ?',
44
- [username]
45
- );
46
-
47
- if (!user) {
48
- return res.status(400).json({
49
- error: 'InvalidCredentialsError',
50
- detail: 'Invalid email or password',
51
- status_code: 400,
52
- timestamp: new Date().toISOString(),
53
- auth_required: true
54
- });
55
- }
56
-
57
- // Verify password
58
- const isValidPassword = await bcrypt.compare(password, user.hashed_password);
59
- if (!isValidPassword) {
60
- return res.status(400).json({
61
- error: 'InvalidCredentialsError',
62
- detail: 'Invalid email or password',
63
- status_code: 400,
64
- timestamp: new Date().toISOString(),
65
- auth_required: true
66
- });
67
- }
68
-
69
- // Check if user is active
70
- if (!user.is_active) {
71
- return res.status(400).json({
72
- error: 'InactiveUserError',
73
- detail: 'User account is inactive',
74
- status_code: 400,
75
- timestamp: new Date().toISOString(),
76
- auth_required: true
77
- });
78
- }
79
-
80
- // Generate JWT token
81
- const token = jwt.sign(
82
- { sub: user.id, email: user.email },
83
- JWT_SECRET,
84
- { expiresIn: JWT_LIFETIME_SECONDS }
85
- );
86
-
87
- return res.status(200).json({
88
- access_token: token,
89
- token_type: 'bearer'
90
- });
91
-
92
- } catch (error) {
93
- console.error('Login error:', error);
94
- return res.status(500).json({
95
- error: 'InternalServerError',
96
- detail: 'An unexpected error occurred during login',
97
- status_code: 500,
98
- timestamp: new Date().toISOString()
99
- });
100
- }
101
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/api/auth/register.js DELETED
@@ -1,99 +0,0 @@
1
- import { createHash } from 'crypto';
2
- import { v4 as uuidv4 } from 'uuid';
3
- import jwt from 'jsonwebtoken';
4
- import bcrypt from 'bcryptjs';
5
- import { getDatabase } from '../lib/database.js';
6
-
7
- const JWT_SECRET = process.env.JWT_SECRET || 'your-super-secret-jwt-key-here';
8
- const JWT_LIFETIME_SECONDS = parseInt(process.env.JWT_LIFETIME_SECONDS || '3600');
9
-
10
- export default async function handler(req, res) {
11
- // Set CORS headers
12
- res.setHeader('Access-Control-Allow-Origin', '*');
13
- res.setHeader('Access-Control-Allow-Methods', 'POST, OPTIONS');
14
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
15
-
16
- if (req.method === 'OPTIONS') {
17
- return res.status(200).end();
18
- }
19
-
20
- if (req.method !== 'POST') {
21
- return res.status(405).json({
22
- error: 'MethodNotAllowed',
23
- detail: 'Method not allowed',
24
- status_code: 405,
25
- timestamp: new Date().toISOString()
26
- });
27
- }
28
-
29
- try {
30
- const { email, password } = req.body;
31
-
32
- if (!email || !password) {
33
- return res.status(422).json({
34
- error: 'ValidationError',
35
- detail: 'Email and password are required',
36
- status_code: 422,
37
- timestamp: new Date().toISOString()
38
- });
39
- }
40
-
41
- const db = await getDatabase();
42
-
43
- // Check if user already exists
44
- const existingUser = await db.get(
45
- 'SELECT id FROM users WHERE email = ?',
46
- [email]
47
- );
48
-
49
- if (existingUser) {
50
- return res.status(400).json({
51
- error: 'UserAlreadyExistsError',
52
- detail: `User with email ${email} already exists`,
53
- status_code: 400,
54
- timestamp: new Date().toISOString(),
55
- registration_error: true
56
- });
57
- }
58
-
59
- // Hash password
60
- const hashedPassword = await bcrypt.hash(password, 12);
61
- const userId = uuidv4();
62
- const now = new Date().toISOString();
63
-
64
- // Create user
65
- await db.run(
66
- `INSERT INTO users (id, email, hashed_password, is_active, is_superuser, is_verified, created_at, updated_at)
67
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
68
- [userId, email, hashedPassword, 1, 0, 0, now, now]
69
- );
70
-
71
- // Generate JWT token
72
- const token = jwt.sign(
73
- { sub: userId, email: email },
74
- JWT_SECRET,
75
- { expiresIn: JWT_LIFETIME_SECONDS }
76
- );
77
-
78
- return res.status(201).json({
79
- id: userId,
80
- email: email,
81
- is_active: true,
82
- is_superuser: false,
83
- is_verified: false,
84
- created_at: now,
85
- updated_at: now,
86
- access_token: token,
87
- token_type: 'bearer'
88
- });
89
-
90
- } catch (error) {
91
- console.error('Registration error:', error);
92
- return res.status(500).json({
93
- error: 'InternalServerError',
94
- detail: 'An unexpected error occurred during registration',
95
- status_code: 500,
96
- timestamp: new Date().toISOString()
97
- });
98
- }
99
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/api/health.js DELETED
@@ -1,191 +0,0 @@
1
- export default async function handler(req, res) {
2
- // Set CORS headers
3
- res.setHeader('Access-Control-Allow-Origin', '*');
4
- res.setHeader('Access-Control-Allow-Methods', 'GET, OPTIONS');
5
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
6
-
7
- if (req.method === 'OPTIONS') {
8
- return res.status(200).end();
9
- }
10
-
11
- if (req.method !== 'GET') {
12
- return res.status(405).json({
13
- error: 'MethodNotAllowed',
14
- detail: 'Method not allowed',
15
- status_code: 405,
16
- timestamp: new Date().toISOString()
17
- });
18
- }
19
-
20
- const startTime = Date.now();
21
- const healthStatus = {
22
- status: 'ok',
23
- timestamp: new Date().toISOString(),
24
- services: {},
25
- system_metrics: {
26
- response_time_ms: 0,
27
- timestamp: new Date().toISOString()
28
- },
29
- alerts: [],
30
- summary: {
31
- total_services: 0,
32
- healthy_services: 0,
33
- degraded_services: 0,
34
- unhealthy_services: 0
35
- }
36
- };
37
-
38
- const services = [];
39
-
40
- // Check database connection
41
- try {
42
- const dbStartTime = Date.now();
43
- const { getDatabase } = await import('./lib/database.js');
44
- const db = await getDatabase();
45
- await db.get('SELECT 1');
46
-
47
- // Get basic stats
48
- const userCount = await db.get('SELECT COUNT(*) as count FROM users');
49
- const docCount = await db.get('SELECT COUNT(*) as count FROM documents');
50
-
51
- const dbResponseTime = Date.now() - dbStartTime;
52
-
53
- healthStatus.services.database = {
54
- status: 'healthy',
55
- response_time_ms: dbResponseTime,
56
- metadata: {
57
- type: 'sqlite',
58
- user_count: userCount?.count || 0,
59
- document_count: docCount?.count || 0
60
- },
61
- last_check: new Date().toISOString()
62
- };
63
- services.push('healthy');
64
- } catch (error) {
65
- console.error('Database health check failed:', error);
66
- healthStatus.services.database = {
67
- status: 'unhealthy',
68
- error_message: error.message,
69
- last_check: new Date().toISOString()
70
- };
71
- healthStatus.status = 'degraded';
72
- services.push('unhealthy');
73
- }
74
-
75
- // Check Qdrant connection
76
- try {
77
- const qdrantStartTime = Date.now();
78
- const { getQdrantClient } = await import('./lib/qdrant.js');
79
- const qdrantClient = getQdrantClient();
80
- const collections = await qdrantClient.getCollections();
81
- const qdrantResponseTime = Date.now() - qdrantStartTime;
82
-
83
- healthStatus.services.qdrant = {
84
- status: 'healthy',
85
- response_time_ms: qdrantResponseTime,
86
- metadata: {
87
- collections_count: collections.collections?.length || 0,
88
- collections: collections.collections?.map(c => c.name) || []
89
- },
90
- last_check: new Date().toISOString()
91
- };
92
- services.push('healthy');
93
- } catch (error) {
94
- console.error('Qdrant health check failed:', error);
95
- healthStatus.services.qdrant = {
96
- status: 'unhealthy',
97
- error_message: error.message,
98
- last_check: new Date().toISOString()
99
- };
100
- healthStatus.status = 'degraded';
101
- services.push('unhealthy');
102
- }
103
-
104
- // Check Gemini API
105
- try {
106
- const geminiStartTime = Date.now();
107
- const { generateResponse } = await import('./lib/gemini.js');
108
- const testResponse = await generateResponse('Hello, respond with OK if working.');
109
- const geminiResponseTime = Date.now() - geminiStartTime;
110
-
111
- healthStatus.services.gemini = {
112
- status: 'healthy',
113
- response_time_ms: geminiResponseTime,
114
- metadata: {
115
- model: 'gemini-pro',
116
- test_response_length: testResponse?.length || 0
117
- },
118
- last_check: new Date().toISOString()
119
- };
120
- services.push('healthy');
121
- } catch (error) {
122
- console.error('Gemini health check failed:', error);
123
- healthStatus.services.gemini = {
124
- status: 'unhealthy',
125
- error_message: error.message,
126
- last_check: new Date().toISOString()
127
- };
128
- healthStatus.status = 'degraded';
129
- services.push('unhealthy');
130
- }
131
-
132
- // Check OpenAI embeddings
133
- try {
134
- const embeddingStartTime = Date.now();
135
- const { generateEmbeddings } = await import('./lib/embeddings.js');
136
- const testEmbedding = await generateEmbeddings('test health check');
137
- const embeddingResponseTime = Date.now() - embeddingStartTime;
138
-
139
- healthStatus.services.embeddings = {
140
- status: 'healthy',
141
- response_time_ms: embeddingResponseTime,
142
- metadata: {
143
- model: 'text-embedding-ada-002',
144
- embedding_dimension: testEmbedding?.length || 0
145
- },
146
- last_check: new Date().toISOString()
147
- };
148
- services.push('healthy');
149
- } catch (error) {
150
- console.error('Embeddings health check failed:', error);
151
- healthStatus.services.embeddings = {
152
- status: 'unhealthy',
153
- error_message: error.message,
154
- last_check: new Date().toISOString()
155
- };
156
- healthStatus.status = 'degraded';
157
- services.push('unhealthy');
158
- }
159
-
160
- // Calculate overall response time
161
- healthStatus.system_metrics.response_time_ms = Date.now() - startTime;
162
-
163
- // Calculate summary
164
- healthStatus.summary.total_services = services.length;
165
- healthStatus.summary.healthy_services = services.filter(s => s === 'healthy').length;
166
- healthStatus.summary.unhealthy_services = services.filter(s => s === 'unhealthy').length;
167
- healthStatus.summary.degraded_services = services.filter(s => s === 'degraded').length;
168
-
169
- // Check for performance alerts
170
- const responseTimeThreshold = 5000; // 5 seconds
171
- if (healthStatus.system_metrics.response_time_ms > responseTimeThreshold) {
172
- healthStatus.alerts.push({
173
- type: 'high_response_time',
174
- severity: 'warning',
175
- message: `Health check response time is ${healthStatus.system_metrics.response_time_ms}ms (threshold: ${responseTimeThreshold}ms)`,
176
- value: healthStatus.system_metrics.response_time_ms,
177
- threshold: responseTimeThreshold
178
- });
179
- }
180
-
181
- // Set overall status based on service health
182
- if (healthStatus.summary.unhealthy_services > 0) {
183
- healthStatus.status = 'unhealthy';
184
- } else if (healthStatus.summary.degraded_services > 0) {
185
- healthStatus.status = 'degraded';
186
- } else {
187
- healthStatus.status = 'healthy';
188
- }
189
-
190
- return res.status(200).json(healthStatus);
191
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/api/package.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "name": "knowledge-assistant-api",
3
- "version": "1.0.0",
4
- "description": "Serverless API functions for Knowledge Assistant RAG",
5
- "type": "module",
6
- "dependencies": {
7
- "@google/generative-ai": "^0.2.1",
8
- "bcryptjs": "^2.4.3",
9
- "formidable": "^3.5.1",
10
- "jsonwebtoken": "^9.0.2",
11
- "sqlite": "^5.1.1",
12
- "sqlite3": "^5.1.6",
13
- "uuid": "^9.0.1"
14
- }
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/api/query.js DELETED
@@ -1,142 +0,0 @@
1
- import { requireAuth } from './lib/auth.js';
2
- import { getDatabase } from './lib/database.js';
3
- import { generateEmbeddings } from './lib/embeddings.js';
4
- import { getQdrantClient, getUserCollectionName } from './lib/qdrant.js';
5
- import { generateResponse, formatPrompt } from './lib/gemini.js';
6
-
7
- async function queryHandler(req, res) {
8
- // Set CORS headers
9
- res.setHeader('Access-Control-Allow-Origin', '*');
10
- res.setHeader('Access-Control-Allow-Methods', 'POST, OPTIONS');
11
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
12
-
13
- if (req.method === 'OPTIONS') {
14
- return res.status(200).end();
15
- }
16
-
17
- if (req.method !== 'POST') {
18
- return res.status(405).json({
19
- error: 'MethodNotAllowed',
20
- detail: 'Method not allowed',
21
- status_code: 405,
22
- timestamp: new Date().toISOString()
23
- });
24
- }
25
-
26
- try {
27
- const user = req.user;
28
- const { query } = req.body;
29
-
30
- if (!query || !query.trim()) {
31
- return res.status(422).json({
32
- error: 'ValidationError',
33
- detail: 'Query is required',
34
- status_code: 422,
35
- timestamp: new Date().toISOString()
36
- });
37
- }
38
-
39
- // Generate query embedding
40
- const queryEmbedding = await generateEmbeddings(query);
41
-
42
- // Get user's collection name
43
- const collectionName = getUserCollectionName(user.id);
44
-
45
- // Search for relevant documents in user's collection
46
- const qdrantClient = getQdrantClient();
47
- let searchResults = [];
48
-
49
- try {
50
- searchResults = await qdrantClient.searchVectors(collectionName, queryEmbedding, 3);
51
- } catch (error) {
52
- // Collection might not exist if user hasn't uploaded any documents
53
- if (error.message.includes('not found') || error.message.includes('does not exist')) {
54
- searchResults = [];
55
- } else {
56
- throw error;
57
- }
58
- }
59
-
60
- // Check if any results were found
61
- if (!searchResults || searchResults.length === 0) {
62
- // Check if user has any documents at all
63
- const db = await getDatabase();
64
- const docCount = await db.get(
65
- 'SELECT COUNT(*) as count FROM document_metadata WHERE user_id = ?',
66
- [user.id]
67
- );
68
-
69
- let message;
70
- if (docCount.count === 0) {
71
- message = "You haven't uploaded any documents yet. Please upload some documents to build your knowledge base before asking questions.";
72
- } else {
73
- message = "I couldn't find any relevant information in your knowledge base to answer your question. Please try rephrasing your query or upload more relevant documents.";
74
- }
75
-
76
- return res.status(200).json({
77
- answer: message,
78
- source_documents: []
79
- });
80
- }
81
-
82
- // Filter results to ensure they belong to the user (additional security check)
83
- const filteredResults = searchResults.filter(result =>
84
- result.payload && result.payload.user_id === user.id
85
- );
86
-
87
- if (filteredResults.length === 0) {
88
- return res.status(200).json({
89
- answer: "I couldn't find any relevant information in your personal knowledge base to answer your question. Please try rephrasing your query or upload more relevant documents.",
90
- source_documents: []
91
- });
92
- }
93
-
94
- // Format the prompt for the LLM
95
- const prompt = formatPrompt(query, filteredResults);
96
-
97
- // Generate a response from Gemini
98
- const answer = await generateResponse(prompt);
99
-
100
- // Extract source documents for citation
101
- const sourceDocuments = filteredResults.map(result => ({
102
- source: result.payload?.source || 'Unknown',
103
- text: result.payload?.text?.substring(0, 500) + (result.payload?.text?.length > 500 ? '...' : '') || 'N/A',
104
- score: result.score || 0.0
105
- }));
106
-
107
- return res.status(200).json({
108
- answer: answer,
109
- source_documents: sourceDocuments
110
- });
111
-
112
- } catch (error) {
113
- console.error('Query error:', error);
114
-
115
- if (error.message.includes('GEMINI_API_KEY')) {
116
- return res.status(503).json({
117
- error: 'ServiceUnavailableError',
118
- detail: 'LLM service is not configured properly',
119
- status_code: 503,
120
- timestamp: new Date().toISOString()
121
- });
122
- }
123
-
124
- if (error.message.includes('OPENAI_API_KEY')) {
125
- return res.status(503).json({
126
- error: 'ServiceUnavailableError',
127
- detail: 'Embedding service is not configured properly',
128
- status_code: 503,
129
- timestamp: new Date().toISOString()
130
- });
131
- }
132
-
133
- return res.status(500).json({
134
- error: 'InternalServerError',
135
- detail: 'An unexpected error occurred during query processing',
136
- status_code: 500,
137
- timestamp: new Date().toISOString()
138
- });
139
- }
140
- }
141
-
142
- export default requireAuth(queryHandler);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/api/upload.js DELETED
@@ -1,171 +0,0 @@
1
- import { requireAuth } from './lib/auth.js';
2
- import { getDatabase } from './lib/database.js';
3
- import { generateEmbeddings, getEmbeddingDimension } from './lib/embeddings.js';
4
- import { getQdrantClient, ensureUserCollectionExists } from './lib/qdrant.js';
5
- import { chunkText, calculateFileHash, parseDocument, validateFileType, validateFileSize } from './lib/processing.js';
6
- import { v4 as uuidv4 } from 'uuid';
7
- import formidable from 'formidable';
8
- import fs from 'fs';
9
-
10
- export const config = {
11
- api: {
12
- bodyParser: false,
13
- },
14
- };
15
-
16
- async function uploadHandler(req, res) {
17
- // Set CORS headers
18
- res.setHeader('Access-Control-Allow-Origin', '*');
19
- res.setHeader('Access-Control-Allow-Methods', 'POST, OPTIONS');
20
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
21
-
22
- if (req.method === 'OPTIONS') {
23
- return res.status(200).end();
24
- }
25
-
26
- if (req.method !== 'POST') {
27
- return res.status(405).json({
28
- error: 'MethodNotAllowed',
29
- detail: 'Method not allowed',
30
- status_code: 405,
31
- timestamp: new Date().toISOString()
32
- });
33
- }
34
-
35
- try {
36
- const user = req.user;
37
-
38
- // Parse form data
39
- const form = formidable({
40
- maxFileSize: 10 * 1024 * 1024, // 10MB limit
41
- keepExtensions: true,
42
- });
43
-
44
- const [fields, files] = await form.parse(req);
45
- const file = files.file?.[0];
46
-
47
- if (!file) {
48
- return res.status(422).json({
49
- error: 'ValidationError',
50
- detail: 'No file provided',
51
- status_code: 422,
52
- timestamp: new Date().toISOString()
53
- });
54
- }
55
-
56
- // Validate file
57
- const fileExtension = validateFileType(file.originalFilename);
58
- validateFileSize(file.size);
59
-
60
- // Read file content
61
- const fileContent = fs.readFileSync(file.filepath, 'utf8');
62
-
63
- // Calculate file hash for duplicate detection
64
- const fileHash = calculateFileHash(fileContent);
65
-
66
- // Check for duplicate uploads by this user
67
- const db = await getDatabase();
68
- const existingDoc = await db.get(
69
- 'SELECT filename, upload_date, chunks_count FROM document_metadata WHERE user_id = ? AND file_hash = ?',
70
- [user.id, fileHash]
71
- );
72
-
73
- if (existingDoc) {
74
- return res.status(200).json({
75
- filename: file.originalFilename,
76
- message: `File already exists (uploaded as '${existingDoc.filename}' on ${existingDoc.upload_date})`,
77
- num_chunks_stored: existingDoc.chunks_count
78
- });
79
- }
80
-
81
- // Parse document text
82
- const text = parseDocument(fileContent, fileExtension);
83
-
84
- if (!text || !text.trim()) {
85
- return res.status(422).json({
86
- error: 'EmptyFileError',
87
- detail: 'File appears to be empty or contains no readable text',
88
- status_code: 422,
89
- timestamp: new Date().toISOString()
90
- });
91
- }
92
-
93
- // Create text chunks
94
- const chunks = chunkText(text);
95
-
96
- if (chunks.length === 0) {
97
- return res.status(422).json({
98
- error: 'EmptyFileError',
99
- detail: 'No text chunks could be created from the file',
100
- status_code: 422,
101
- timestamp: new Date().toISOString()
102
- });
103
- }
104
-
105
- // Generate embeddings
106
- const embeddings = await generateEmbeddings(chunks);
107
-
108
- // Ensure user collection exists
109
- const embeddingDimension = getEmbeddingDimension();
110
- const collectionName = await ensureUserCollectionExists(user.id, embeddingDimension);
111
-
112
- // Prepare payloads for vector store
113
- const payloads = chunks.map(chunk => ({
114
- text: chunk,
115
- source: file.originalFilename,
116
- user_id: user.id,
117
- upload_date: new Date().toISOString()
118
- }));
119
-
120
- // Store in Qdrant
121
- const qdrantClient = getQdrantClient();
122
- await qdrantClient.upsertVectors(collectionName, embeddings, payloads);
123
-
124
- // Store document metadata in database
125
- const docId = uuidv4();
126
- await db.run(
127
- `INSERT INTO document_metadata (id, user_id, filename, original_size, chunks_count, file_hash, upload_date)
128
- VALUES (?, ?, ?, ?, ?, ?, ?)`,
129
- [docId, user.id, file.originalFilename, file.size, chunks.length, fileHash, new Date().toISOString()]
130
- );
131
-
132
- // Clean up temporary file
133
- fs.unlinkSync(file.filepath);
134
-
135
- return res.status(200).json({
136
- filename: file.originalFilename,
137
- message: 'Successfully uploaded, processed, and stored in your personal knowledge base.',
138
- num_chunks_stored: chunks.length
139
- });
140
-
141
- } catch (error) {
142
- console.error('Upload error:', error);
143
-
144
- if (error.message.includes('File size exceeds')) {
145
- return res.status(413).json({
146
- error: 'FileProcessingError',
147
- detail: error.message,
148
- status_code: 413,
149
- timestamp: new Date().toISOString()
150
- });
151
- }
152
-
153
- if (error.message.includes('Unsupported file type')) {
154
- return res.status(422).json({
155
- error: 'InvalidFileTypeError',
156
- detail: error.message,
157
- status_code: 422,
158
- timestamp: new Date().toISOString()
159
- });
160
- }
161
-
162
- return res.status(500).json({
163
- error: 'InternalServerError',
164
- detail: 'An unexpected error occurred during file upload',
165
- status_code: 500,
166
- timestamp: new Date().toISOString()
167
- });
168
- }
169
- }
170
-
171
- export default requireAuth(uploadHandler);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/src/App.tsx CHANGED
@@ -7,6 +7,8 @@ import { AuthProvider } from "@/contexts/AuthContext";
7
  import { ThemeProvider } from "@/contexts/ThemeContext";
8
  import ErrorBoundary from "@/components/ErrorBoundary";
9
  import ProtectedRoute from "@/components/ProtectedRoute";
 
 
10
  import Login from "./pages/Login";
11
  import Register from "./pages/Register";
12
  import Dashboard from "./pages/Dashboard";
 
7
  import { ThemeProvider } from "@/contexts/ThemeContext";
8
  import ErrorBoundary from "@/components/ErrorBoundary";
9
  import ProtectedRoute from "@/components/ProtectedRoute";
10
+
11
+
12
  import Login from "./pages/Login";
13
  import Register from "./pages/Register";
14
  import Dashboard from "./pages/Dashboard";
rag-quest-hub/src/components/ChatInterface.tsx CHANGED
@@ -5,7 +5,7 @@ import { Card } from '@/components/ui/card';
5
  import { Send, Bot, User, Loader2, FileText, ExternalLink, RefreshCw, AlertTriangle, WifiOff } from 'lucide-react';
6
  import { queryAPI, QueryResponse } from '@/lib/api';
7
  import { useToast } from '@/hooks/use-toast';
8
- import { analyzeError, createRetryFunction, showErrorToast, ConnectionMonitor } from '@/lib/errorHandling';
9
 
10
  interface Message {
11
  id: string;
@@ -54,8 +54,10 @@ const ChatInterface: React.FC = () => {
54
  setMessages([welcomeMessage]);
55
 
56
  // Set up connection monitoring
57
- const monitor = ConnectionMonitor.getInstance();
58
- const unsubscribe = monitor.addListener(setIsOnline);
 
 
59
 
60
  return unsubscribe;
61
  }, []);
 
5
  import { Send, Bot, User, Loader2, FileText, ExternalLink, RefreshCw, AlertTriangle, WifiOff } from 'lucide-react';
6
  import { queryAPI, QueryResponse } from '@/lib/api';
7
  import { useToast } from '@/hooks/use-toast';
8
+ import { analyzeError, createRetryFunction, showErrorToast, SimpleHealthChecker } from '@/lib/errorHandling';
9
 
10
  interface Message {
11
  id: string;
 
54
  setMessages([welcomeMessage]);
55
 
56
  // Set up connection monitoring
57
+ const monitor = SimpleHealthChecker.getInstance();
58
+ const unsubscribe = monitor.addListener((status) => {
59
+ setIsOnline(status === 'online');
60
+ });
61
 
62
  return unsubscribe;
63
  }, []);
rag-quest-hub/src/components/ConnectionStatus.tsx DELETED
@@ -1,283 +0,0 @@
1
- import React, { useState, useEffect } from 'react';
2
- import { Wifi, WifiOff, AlertCircle, CheckCircle, Server, Database, Brain, RefreshCw, ChevronDown, ChevronUp } from 'lucide-react';
3
- import { Alert, AlertDescription } from '@/components/ui/alert';
4
- import { Button } from '@/components/ui/button';
5
- import { Badge } from '@/components/ui/badge';
6
- import { Collapsible, CollapsibleContent, CollapsibleTrigger } from '@/components/ui/collapsible';
7
- import { ConnectionMonitor, HealthCheckResponse } from '@/lib/errorHandling';
8
-
9
- interface ConnectionStatusProps {
10
- showWhenOnline?: boolean;
11
- className?: string;
12
- showServiceDetails?: boolean;
13
- }
14
-
15
- const ConnectionStatus: React.FC<ConnectionStatusProps> = ({
16
- showWhenOnline = false,
17
- className = "",
18
- showServiceDetails = true
19
- }) => {
20
- const [isOnline, setIsOnline] = useState(navigator.onLine);
21
- const [serverStatus, setServerStatus] = useState<'checking' | 'online' | 'offline'>('checking');
22
- const [serviceHealth, setServiceHealth] = useState<HealthCheckResponse | null>(null);
23
- const [isDetailsOpen, setIsDetailsOpen] = useState(false);
24
- const [isRetrying, setIsRetrying] = useState(false);
25
-
26
- useEffect(() => {
27
- const monitor = ConnectionMonitor.getInstance();
28
-
29
- // Listen for connection changes
30
- const unsubscribeConnection = monitor.addListener((online) => {
31
- setIsOnline(online);
32
- if (online) {
33
- setServerStatus(monitor.getServerStatus());
34
- } else {
35
- setServerStatus('offline');
36
- setServiceHealth(null);
37
- }
38
- });
39
-
40
- // Listen for health status changes
41
- const unsubscribeHealth = monitor.addHealthListener((health) => {
42
- setServiceHealth(health);
43
- if (health) {
44
- setServerStatus(health.status === 'ok' ? 'online' : 'offline');
45
- }
46
- });
47
-
48
- // Set initial state
49
- setIsOnline(monitor.getStatus());
50
- setServerStatus(monitor.getServerStatus());
51
- setServiceHealth(monitor.getServiceHealth());
52
-
53
- return () => {
54
- unsubscribeConnection();
55
- unsubscribeHealth();
56
- };
57
- }, []);
58
-
59
- const handleRetryConnection = async () => {
60
- setIsRetrying(true);
61
- const monitor = ConnectionMonitor.getInstance();
62
- await monitor.forceHealthCheck();
63
- setIsRetrying(false);
64
- };
65
-
66
- const getServiceIcon = (serviceName: string) => {
67
- switch (serviceName) {
68
- case 'qdrant':
69
- return <Database className="h-3 w-3" />;
70
- case 'ollama':
71
- return <Brain className="h-3 w-3" />;
72
- case 'embedding_model':
73
- return <Server className="h-3 w-3" />;
74
- default:
75
- return <Server className="h-3 w-3" />;
76
- }
77
- };
78
-
79
- const getServiceDisplayName = (serviceName: string) => {
80
- switch (serviceName) {
81
- case 'qdrant':
82
- return 'Vector Database';
83
- case 'ollama':
84
- return 'Language Model';
85
- case 'embedding_model':
86
- return 'Embedding Model';
87
- default:
88
- return serviceName;
89
- }
90
- };
91
-
92
- const getServiceStatusBadge = (status: string) => {
93
- switch (status) {
94
- case 'healthy':
95
- return <Badge variant="default" className="bg-green-500/10 text-green-600 border-green-500/20">Healthy</Badge>;
96
- case 'unhealthy':
97
- return <Badge variant="destructive">Unhealthy</Badge>;
98
- default:
99
- return <Badge variant="secondary">Unknown</Badge>;
100
- }
101
- };
102
-
103
- // Don't show anything if online and showWhenOnline is false
104
- if (isOnline && serverStatus === 'online' && !showWhenOnline) {
105
- return null;
106
- }
107
-
108
- // Compact corner indicator mode when showServiceDetails is false
109
- if (!showServiceDetails) {
110
- const getCompactStatus = () => {
111
- if (!isOnline) {
112
- return { icon: <WifiOff className="h-3 w-3" />, text: 'Offline', color: 'bg-red-500' };
113
- }
114
- if (serverStatus === 'offline') {
115
- return { icon: <AlertCircle className="h-3 w-3" />, text: 'Server Down', color: 'bg-red-500' };
116
- }
117
- if (serverStatus === 'checking') {
118
- return { icon: <RefreshCw className="h-3 w-3 animate-spin" />, text: 'Checking...', color: 'bg-yellow-500' };
119
- }
120
-
121
- const hasUnhealthyServices = serviceHealth?.services &&
122
- Object.values(serviceHealth.services).some(service => service?.status === 'unhealthy');
123
-
124
- if (hasUnhealthyServices) {
125
- return { icon: <AlertCircle className="h-3 w-3" />, text: 'Issues', color: 'bg-yellow-500' };
126
- }
127
-
128
- return { icon: <CheckCircle className="h-3 w-3" />, text: 'Online', color: 'bg-green-500' };
129
- };
130
-
131
- const compactStatus = getCompactStatus();
132
-
133
- return (
134
- <div className={`${className} flex items-center gap-2 px-3 py-2 bg-card/90 backdrop-blur-sm border border-border/50 rounded-full shadow-lg text-xs`}>
135
- <div className={`w-2 h-2 rounded-full ${compactStatus.color}`} />
136
- {compactStatus.icon}
137
- <span className="font-medium">{compactStatus.text}</span>
138
- </div>
139
- );
140
- }
141
-
142
- const getStatusInfo = () => {
143
- if (!isOnline) {
144
- return {
145
- icon: <WifiOff className="h-4 w-4" />,
146
- variant: 'destructive' as const,
147
- title: 'No Internet Connection',
148
- description: 'You are currently offline. Please check your internet connection.',
149
- showRetry: false,
150
- };
151
- }
152
-
153
- if (serverStatus === 'offline') {
154
- return {
155
- icon: <AlertCircle className="h-4 w-4" />,
156
- variant: 'destructive' as const,
157
- title: 'Server Unavailable',
158
- description: 'Cannot connect to the server. Some features may not work properly.',
159
- showRetry: true,
160
- };
161
- }
162
-
163
- if (serverStatus === 'checking') {
164
- return {
165
- icon: <Wifi className="h-4 w-4 animate-pulse" />,
166
- variant: 'default' as const,
167
- title: 'Checking Connection',
168
- description: 'Verifying server connection...',
169
- showRetry: false,
170
- };
171
- }
172
-
173
- // Check if any services are unhealthy
174
- const hasUnhealthyServices = serviceHealth?.services &&
175
- Object.values(serviceHealth.services).some(service => service?.status === 'unhealthy');
176
-
177
- if (hasUnhealthyServices) {
178
- return {
179
- icon: <AlertCircle className="h-4 w-4" />,
180
- variant: 'destructive' as const,
181
- title: 'Service Issues Detected',
182
- description: 'Some services are experiencing issues. Check details below.',
183
- showRetry: true,
184
- };
185
- }
186
-
187
- return {
188
- icon: <CheckCircle className="h-4 w-4" />,
189
- variant: 'default' as const,
190
- title: 'All Systems Operational',
191
- description: serviceHealth ? `Response time: ${serviceHealth.services.qdrant?.responseTime || 0}ms` : 'Connected to server.',
192
- showRetry: false,
193
- };
194
- };
195
-
196
- const statusInfo = getStatusInfo();
197
-
198
- return (
199
- <Alert variant={statusInfo.variant} className={className}>
200
- {statusInfo.icon}
201
- <AlertDescription>
202
- <div className="space-y-3">
203
- <div className="flex items-center justify-between">
204
- <div>
205
- <div className="font-medium">{statusInfo.title}</div>
206
- <div className="text-sm">{statusInfo.description}</div>
207
- {serviceHealth && (
208
- <div className="text-xs text-muted-foreground mt-1">
209
- Last checked: {new Date(serviceHealth.timestamp).toLocaleTimeString()}
210
- </div>
211
- )}
212
- </div>
213
- <div className="flex items-center gap-2">
214
- {statusInfo.showRetry && (
215
- <Button
216
- variant="outline"
217
- size="sm"
218
- onClick={handleRetryConnection}
219
- disabled={serverStatus === 'checking' || isRetrying}
220
- >
221
- <RefreshCw className={`h-3 w-3 mr-1 ${isRetrying ? 'animate-spin' : ''}`} />
222
- {isRetrying ? 'Retrying...' : 'Retry'}
223
- </Button>
224
- )}
225
- {showServiceDetails && serviceHealth && (
226
- <Collapsible open={isDetailsOpen} onOpenChange={setIsDetailsOpen}>
227
- <CollapsibleTrigger asChild>
228
- <Button variant="ghost" size="sm">
229
- {isDetailsOpen ? <ChevronUp className="h-3 w-3" /> : <ChevronDown className="h-3 w-3" />}
230
- </Button>
231
- </CollapsibleTrigger>
232
- </Collapsible>
233
- )}
234
- </div>
235
- </div>
236
-
237
- {/* Service Details */}
238
- {showServiceDetails && serviceHealth && (
239
- <Collapsible open={isDetailsOpen} onOpenChange={setIsDetailsOpen}>
240
- <CollapsibleContent className="space-y-2">
241
- <div className="border-t border-border/50 pt-3">
242
- <div className="text-xs font-medium text-muted-foreground mb-2">Service Status</div>
243
- <div className="grid grid-cols-1 sm:grid-cols-3 gap-2">
244
- {Object.entries(serviceHealth.services).map(([serviceName, service]) => (
245
- <div key={serviceName} className="flex items-center justify-between p-2 bg-muted/30 rounded-md">
246
- <div className="flex items-center gap-2">
247
- {getServiceIcon(serviceName)}
248
- <span className="text-xs font-medium">{getServiceDisplayName(serviceName)}</span>
249
- </div>
250
- <div className="flex flex-col items-end gap-1">
251
- {getServiceStatusBadge(service?.status || 'unknown')}
252
- {service?.responseTime && (
253
- <span className="text-xs text-muted-foreground">{service.responseTime}ms</span>
254
- )}
255
- </div>
256
- </div>
257
- ))}
258
- </div>
259
-
260
- {/* Show errors if any */}
261
- {Object.entries(serviceHealth.services).some(([, service]) => service?.error) && (
262
- <div className="mt-3">
263
- <div className="text-xs font-medium text-muted-foreground mb-1">Service Errors</div>
264
- {Object.entries(serviceHealth.services).map(([serviceName, service]) =>
265
- service?.error && (
266
- <div key={serviceName} className="text-xs text-destructive bg-destructive/10 p-2 rounded-md">
267
- <span className="font-medium">{getServiceDisplayName(serviceName)}:</span> {service.error}
268
- </div>
269
- )
270
- )}
271
- </div>
272
- )}
273
- </div>
274
- </CollapsibleContent>
275
- </Collapsible>
276
- )}
277
- </div>
278
- </AlertDescription>
279
- </Alert>
280
- );
281
- };
282
-
283
- export default ConnectionStatus;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/src/components/ServiceMonitor.tsx DELETED
@@ -1,364 +0,0 @@
1
- import React, { useState, useEffect } from 'react';
2
- import { Card, CardContent, CardDescription, CardHeader, CardTitle } from './ui/card';
3
- import { Badge } from './ui/badge';
4
- import { Button } from './ui/button';
5
- import { Alert, AlertDescription } from './ui/alert';
6
- import { Separator } from './ui/separator';
7
- import { Progress } from './ui/progress';
8
- import { RefreshCw, AlertTriangle, CheckCircle, XCircle, Clock } from 'lucide-react';
9
-
10
- interface ServiceHealth {
11
- name: string;
12
- status: 'healthy' | 'degraded' | 'unhealthy' | 'unknown';
13
- response_time_ms?: number;
14
- error_message?: string;
15
- metadata?: Record<string, any>;
16
- last_check?: string;
17
- }
18
-
19
- interface SystemMetrics {
20
- cpu_percent: number;
21
- memory_percent: number;
22
- disk_percent: number;
23
- disk_free_gb: number;
24
- uptime_seconds: number;
25
- timestamp: string;
26
- }
27
-
28
- interface Alert {
29
- type: string;
30
- severity: 'warning' | 'critical';
31
- message: string;
32
- value: number;
33
- threshold: number;
34
- }
35
-
36
- interface HealthStatus {
37
- status: string;
38
- timestamp: string;
39
- services: Record<string, ServiceHealth>;
40
- system_metrics: SystemMetrics;
41
- alerts: Alert[];
42
- summary: {
43
- total_services: number;
44
- healthy_services: number;
45
- degraded_services: number;
46
- unhealthy_services: number;
47
- };
48
- }
49
-
50
- const ServiceMonitor: React.FC = () => {
51
- const [healthStatus, setHealthStatus] = useState<HealthStatus | null>(null);
52
- const [loading, setLoading] = useState(true);
53
- const [error, setError] = useState<string | null>(null);
54
- const [autoRefresh, setAutoRefresh] = useState(true);
55
-
56
- const fetchHealthStatus = async () => {
57
- try {
58
- setLoading(true);
59
- const response = await fetch('/api/health');
60
- if (!response.ok) {
61
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
62
- }
63
- const data = await response.json();
64
- setHealthStatus(data);
65
- setError(null);
66
- } catch (err) {
67
- setError(err instanceof Error ? err.message : 'Failed to fetch health status');
68
- console.error('Health check failed:', err);
69
- } finally {
70
- setLoading(false);
71
- }
72
- };
73
-
74
- useEffect(() => {
75
- fetchHealthStatus();
76
- }, []);
77
-
78
- useEffect(() => {
79
- if (!autoRefresh) return;
80
-
81
- const interval = setInterval(fetchHealthStatus, 30000); // Refresh every 30 seconds
82
- return () => clearInterval(interval);
83
- }, [autoRefresh]);
84
-
85
- const getStatusIcon = (status: string) => {
86
- switch (status) {
87
- case 'healthy':
88
- return <CheckCircle className="h-4 w-4 text-green-500" />;
89
- case 'degraded':
90
- return <AlertTriangle className="h-4 w-4 text-yellow-500" />;
91
- case 'unhealthy':
92
- return <XCircle className="h-4 w-4 text-red-500" />;
93
- default:
94
- return <Clock className="h-4 w-4 text-gray-500" />;
95
- }
96
- };
97
-
98
- const getStatusBadgeVariant = (status: string) => {
99
- switch (status) {
100
- case 'healthy':
101
- return 'default';
102
- case 'degraded':
103
- return 'secondary';
104
- case 'unhealthy':
105
- return 'destructive';
106
- default:
107
- return 'outline';
108
- }
109
- };
110
-
111
- const formatUptime = (seconds: number) => {
112
- const days = Math.floor(seconds / 86400);
113
- const hours = Math.floor((seconds % 86400) / 3600);
114
- const minutes = Math.floor((seconds % 3600) / 60);
115
-
116
- if (days > 0) {
117
- return `${days}d ${hours}h ${minutes}m`;
118
- } else if (hours > 0) {
119
- return `${hours}h ${minutes}m`;
120
- } else {
121
- return `${minutes}m`;
122
- }
123
- };
124
-
125
- const getProgressColor = (percentage: number, warningThreshold: number, criticalThreshold: number) => {
126
- if (percentage >= criticalThreshold) return 'bg-red-500';
127
- if (percentage >= warningThreshold) return 'bg-yellow-500';
128
- return 'bg-green-500';
129
- };
130
-
131
- if (loading && !healthStatus) {
132
- return (
133
- <Card>
134
- <CardHeader>
135
- <CardTitle className="flex items-center gap-2">
136
- <RefreshCw className="h-5 w-5 animate-spin" />
137
- Loading Service Status...
138
- </CardTitle>
139
- </CardHeader>
140
- </Card>
141
- );
142
- }
143
-
144
- if (error && !healthStatus) {
145
- return (
146
- <Card>
147
- <CardHeader>
148
- <CardTitle className="text-red-600">Service Monitor Error</CardTitle>
149
- </CardHeader>
150
- <CardContent>
151
- <Alert>
152
- <AlertTriangle className="h-4 w-4" />
153
- <AlertDescription>{error}</AlertDescription>
154
- </Alert>
155
- <Button onClick={fetchHealthStatus} className="mt-4">
156
- <RefreshCw className="h-4 w-4 mr-2" />
157
- Retry
158
- </Button>
159
- </CardContent>
160
- </Card>
161
- );
162
- }
163
-
164
- return (
165
- <div className="space-y-6">
166
- {/* Header */}
167
- <div className="flex items-center justify-between">
168
- <div>
169
- <h2 className="text-2xl font-bold">Service Monitor</h2>
170
- <p className="text-muted-foreground">
171
- Last updated: {healthStatus?.timestamp ? new Date(healthStatus.timestamp).toLocaleString() : 'Never'}
172
- </p>
173
- </div>
174
- <div className="flex items-center gap-2">
175
- <Button
176
- variant="outline"
177
- size="sm"
178
- onClick={() => setAutoRefresh(!autoRefresh)}
179
- >
180
- {autoRefresh ? 'Disable Auto-refresh' : 'Enable Auto-refresh'}
181
- </Button>
182
- <Button
183
- variant="outline"
184
- size="sm"
185
- onClick={fetchHealthStatus}
186
- disabled={loading}
187
- >
188
- <RefreshCw className={`h-4 w-4 mr-2 ${loading ? 'animate-spin' : ''}`} />
189
- Refresh
190
- </Button>
191
- </div>
192
- </div>
193
-
194
- {/* Overall Status */}
195
- {healthStatus && (
196
- <Card>
197
- <CardHeader>
198
- <CardTitle className="flex items-center gap-2">
199
- {getStatusIcon(healthStatus.status)}
200
- Overall System Status
201
- <Badge variant={getStatusBadgeVariant(healthStatus.status)}>
202
- {healthStatus.status.toUpperCase()}
203
- </Badge>
204
- </CardTitle>
205
- </CardHeader>
206
- <CardContent>
207
- <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
208
- <div className="text-center">
209
- <div className="text-2xl font-bold text-green-600">
210
- {healthStatus.summary.healthy_services}
211
- </div>
212
- <div className="text-sm text-muted-foreground">Healthy</div>
213
- </div>
214
- <div className="text-center">
215
- <div className="text-2xl font-bold text-yellow-600">
216
- {healthStatus.summary.degraded_services}
217
- </div>
218
- <div className="text-sm text-muted-foreground">Degraded</div>
219
- </div>
220
- <div className="text-center">
221
- <div className="text-2xl font-bold text-red-600">
222
- {healthStatus.summary.unhealthy_services}
223
- </div>
224
- <div className="text-sm text-muted-foreground">Unhealthy</div>
225
- </div>
226
- <div className="text-center">
227
- <div className="text-2xl font-bold">
228
- {healthStatus.summary.total_services}
229
- </div>
230
- <div className="text-sm text-muted-foreground">Total Services</div>
231
- </div>
232
- </div>
233
- </CardContent>
234
- </Card>
235
- )}
236
-
237
- {/* Alerts */}
238
- {healthStatus?.alerts && healthStatus.alerts.length > 0 && (
239
- <Card>
240
- <CardHeader>
241
- <CardTitle className="flex items-center gap-2 text-red-600">
242
- <AlertTriangle className="h-5 w-5" />
243
- Active Alerts
244
- </CardTitle>
245
- </CardHeader>
246
- <CardContent className="space-y-2">
247
- {healthStatus.alerts.map((alert, index) => (
248
- <Alert key={index} className={alert.severity === 'critical' ? 'border-red-500' : 'border-yellow-500'}>
249
- <AlertTriangle className="h-4 w-4" />
250
- <AlertDescription>
251
- <strong>{alert.severity.toUpperCase()}:</strong> {alert.message}
252
- </AlertDescription>
253
- </Alert>
254
- ))}
255
- </CardContent>
256
- </Card>
257
- )}
258
-
259
- {/* System Metrics */}
260
- {healthStatus?.system_metrics && (
261
- <Card>
262
- <CardHeader>
263
- <CardTitle>System Resources</CardTitle>
264
- <CardDescription>
265
- Uptime: {formatUptime(healthStatus.system_metrics.uptime_seconds)}
266
- </CardDescription>
267
- </CardHeader>
268
- <CardContent className="space-y-4">
269
- <div>
270
- <div className="flex justify-between text-sm mb-1">
271
- <span>CPU Usage</span>
272
- <span>{healthStatus.system_metrics.cpu_percent.toFixed(1)}%</span>
273
- </div>
274
- <Progress
275
- value={healthStatus.system_metrics.cpu_percent}
276
- className="h-2"
277
- />
278
- </div>
279
- <div>
280
- <div className="flex justify-between text-sm mb-1">
281
- <span>Memory Usage</span>
282
- <span>{healthStatus.system_metrics.memory_percent.toFixed(1)}%</span>
283
- </div>
284
- <Progress
285
- value={healthStatus.system_metrics.memory_percent}
286
- className="h-2"
287
- />
288
- </div>
289
- <div>
290
- <div className="flex justify-between text-sm mb-1">
291
- <span>Disk Usage</span>
292
- <span>
293
- {healthStatus.system_metrics.disk_percent.toFixed(1)}%
294
- ({healthStatus.system_metrics.disk_free_gb.toFixed(1)} GB free)
295
- </span>
296
- </div>
297
- <Progress
298
- value={healthStatus.system_metrics.disk_percent}
299
- className="h-2"
300
- />
301
- </div>
302
- </CardContent>
303
- </Card>
304
- )}
305
-
306
- {/* Service Details */}
307
- {healthStatus?.services && (
308
- <Card>
309
- <CardHeader>
310
- <CardTitle>Service Details</CardTitle>
311
- </CardHeader>
312
- <CardContent>
313
- <div className="space-y-4">
314
- {Object.entries(healthStatus.services).map(([serviceName, service]) => (
315
- <div key={serviceName} className="border rounded-lg p-4">
316
- <div className="flex items-center justify-between mb-2">
317
- <div className="flex items-center gap-2">
318
- {getStatusIcon(service.status)}
319
- <h4 className="font-semibold capitalize">{serviceName.replace('_', ' ')}</h4>
320
- <Badge variant={getStatusBadgeVariant(service.status)}>
321
- {service.status}
322
- </Badge>
323
- </div>
324
- {service.response_time_ms && (
325
- <span className="text-sm text-muted-foreground">
326
- {service.response_time_ms.toFixed(0)}ms
327
- </span>
328
- )}
329
- </div>
330
-
331
- {service.error_message && (
332
- <Alert className="mb-2">
333
- <AlertTriangle className="h-4 w-4" />
334
- <AlertDescription>{service.error_message}</AlertDescription>
335
- </Alert>
336
- )}
337
-
338
- {service.metadata && (
339
- <div className="text-sm text-muted-foreground">
340
- {Object.entries(service.metadata).map(([key, value]) => (
341
- <div key={key} className="flex justify-between">
342
- <span className="capitalize">{key.replace('_', ' ')}:</span>
343
- <span>{typeof value === 'object' ? JSON.stringify(value) : String(value)}</span>
344
- </div>
345
- ))}
346
- </div>
347
- )}
348
-
349
- {service.last_check && (
350
- <div className="text-xs text-muted-foreground mt-2">
351
- Last checked: {new Date(service.last_check).toLocaleString()}
352
- </div>
353
- )}
354
- </div>
355
- ))}
356
- </div>
357
- </CardContent>
358
- </Card>
359
- )}
360
- </div>
361
- );
362
- };
363
-
364
- export default ServiceMonitor;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/src/components/SimpleConnectionStatus.tsx ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from 'react';
2
+ import { SimpleHealthChecker } from '@/lib/errorHandling';
3
+
4
+ export const SimpleConnectionStatus: React.FC = () => {
5
+ const [status, setStatus] = useState<'online' | 'offline' | 'checking'>('checking');
6
+
7
+ useEffect(() => {
8
+ const checker = SimpleHealthChecker.getInstance();
9
+ const unsubscribe = checker.addListener(setStatus);
10
+
11
+ return unsubscribe;
12
+ }, []);
13
+
14
+ const handleRetry = async () => {
15
+ const checker = SimpleHealthChecker.getInstance();
16
+ await checker.forceCheck();
17
+ };
18
+
19
+ const getStatusColor = () => {
20
+ switch (status) {
21
+ case 'online': return '#10b981'; // green
22
+ case 'offline': return '#ef4444'; // red
23
+ case 'checking': return '#f59e0b'; // yellow
24
+ }
25
+ };
26
+
27
+ const getStatusText = () => {
28
+ switch (status) {
29
+ case 'online': return 'Server Online';
30
+ case 'offline': return 'Server Down';
31
+ case 'checking': return 'Checking...';
32
+ }
33
+ };
34
+
35
+ return (
36
+ <div style={{
37
+ position: 'fixed',
38
+ top: '10px',
39
+ right: '10px',
40
+ background: 'rgba(0,0,0,0.9)',
41
+ color: 'white',
42
+ padding: '8px 12px',
43
+ borderRadius: '6px',
44
+ fontSize: '14px',
45
+ zIndex: 10000,
46
+ display: 'flex',
47
+ alignItems: 'center',
48
+ gap: '8px',
49
+ border: `2px solid ${getStatusColor()}`
50
+ }}>
51
+ <div
52
+ style={{
53
+ width: '8px',
54
+ height: '8px',
55
+ borderRadius: '50%',
56
+ backgroundColor: getStatusColor(),
57
+ animation: status === 'checking' ? 'pulse 1.5s infinite' : 'none'
58
+ }}
59
+ />
60
+ <span>{getStatusText()}</span>
61
+ {status === 'offline' && (
62
+ <button
63
+ onClick={handleRetry}
64
+ style={{
65
+ marginLeft: '8px',
66
+ padding: '4px 8px',
67
+ background: '#3b82f6',
68
+ color: 'white',
69
+ border: 'none',
70
+ borderRadius: '4px',
71
+ cursor: 'pointer',
72
+ fontSize: '12px'
73
+ }}
74
+ >
75
+ Retry
76
+ </button>
77
+ )}
78
+ <style>{`
79
+ @keyframes pulse {
80
+ 0%, 100% { opacity: 1; }
81
+ 50% { opacity: 0.5; }
82
+ }
83
+ `}</style>
84
+ </div>
85
+ );
86
+ };
rag-quest-hub/src/pages/Dashboard.tsx CHANGED
@@ -3,7 +3,7 @@ import { Navigate } from 'react-router-dom';
3
  import Header from '@/components/Header';
4
  import DocumentUpload from '@/components/DocumentUpload';
5
  import ChatInterface from '@/components/ChatInterface';
6
- import ConnectionStatus from '@/components/ConnectionStatus';
7
  import { useAuth } from '@/contexts/AuthContext';
8
 
9
  const Dashboard: React.FC = () => {
@@ -29,11 +29,7 @@ const Dashboard: React.FC = () => {
29
  <Header />
30
 
31
  {/* Fixed Corner Status Indicator */}
32
- <ConnectionStatus
33
- className="fixed top-20 right-4 z-50 max-w-xs"
34
- showWhenOnline={true}
35
- showServiceDetails={false}
36
- />
37
 
38
  <div className="container mx-auto p-4 h-[calc(100vh-4rem)]">
39
  <div className="grid grid-cols-1 lg:grid-cols-3 gap-6 h-full">
 
3
  import Header from '@/components/Header';
4
  import DocumentUpload from '@/components/DocumentUpload';
5
  import ChatInterface from '@/components/ChatInterface';
6
+ import { SimpleConnectionStatus } from '@/components/SimpleConnectionStatus';
7
  import { useAuth } from '@/contexts/AuthContext';
8
 
9
  const Dashboard: React.FC = () => {
 
29
  <Header />
30
 
31
  {/* Fixed Corner Status Indicator */}
32
+ <SimpleConnectionStatus />
 
 
 
 
33
 
34
  <div className="container mx-auto p-4 h-[calc(100vh-4rem)]">
35
  <div className="grid grid-cols-1 lg:grid-cols-3 gap-6 h-full">
rag-quest-hub/src/test/connection-status.integration.test.tsx DELETED
@@ -1,133 +0,0 @@
1
- import { describe, it, expect, vi, beforeEach } from 'vitest';
2
- import { render, screen, waitFor } from '@testing-library/react';
3
- import { userEvent } from '@testing-library/user-event';
4
- import React from 'react';
5
- import ConnectionStatus from '@/components/ConnectionStatus';
6
-
7
- // Mock the error handling module
8
- vi.mock('@/lib/errorHandling', () => {
9
- const mockMonitor = {
10
- addListener: vi.fn(() => vi.fn()),
11
- addHealthListener: vi.fn(() => vi.fn()),
12
- getStatus: vi.fn(() => true),
13
- getServerStatus: vi.fn(() => 'online'),
14
- getServiceHealth: vi.fn(() => ({
15
- status: 'ok',
16
- timestamp: '2024-01-01T00:00:00Z',
17
- services: {
18
- qdrant: { status: 'healthy', collections_count: 1, responseTime: 50 },
19
- ollama: { status: 'healthy', model: 'llama3', responseTime: 100 },
20
- embedding_model: { status: 'healthy', embedding_dimension: 384, responseTime: 25 }
21
- }
22
- })),
23
- forceHealthCheck: vi.fn(),
24
- };
25
-
26
- return {
27
- ConnectionMonitor: {
28
- getInstance: vi.fn(() => mockMonitor),
29
- },
30
- };
31
- });
32
-
33
- // Mock toast
34
- vi.mock('@/hooks/use-toast', () => ({
35
- toast: vi.fn(),
36
- }));
37
-
38
- // Mock UI components
39
- vi.mock('@/components/ui/collapsible', () => ({
40
- Collapsible: ({ children, open }: any) => (
41
- <div data-testid="collapsible" style={{ display: open ? 'block' : 'none' }}>
42
- {children}
43
- </div>
44
- ),
45
- CollapsibleContent: ({ children }: any) => (
46
- <div data-testid="collapsible-content">{children}</div>
47
- ),
48
- CollapsibleTrigger: ({ children, asChild, ...props }: any) =>
49
- asChild ? React.cloneElement(children, props) : <button {...props}>{children}</button>,
50
- }));
51
-
52
- describe('ConnectionStatus Integration', () => {
53
- beforeEach(() => {
54
- vi.clearAllMocks();
55
- });
56
-
57
- it('should render connection status when showWhenOnline is true', () => {
58
- render(<ConnectionStatus showWhenOnline={true} showServiceDetails={true} />);
59
-
60
- expect(screen.getByText('All Systems Operational')).toBeInTheDocument();
61
- });
62
-
63
- it('should show service details when expanded', async () => {
64
- const user = userEvent.setup();
65
- render(<ConnectionStatus showWhenOnline={true} showServiceDetails={true} />);
66
-
67
- // Find and click the expand button
68
- const expandButton = screen.getByRole('button');
69
- await user.click(expandButton);
70
-
71
- await waitFor(() => {
72
- expect(screen.getByText('Vector Database')).toBeInTheDocument();
73
- expect(screen.getByText('Language Model')).toBeInTheDocument();
74
- expect(screen.getByText('Embedding Model')).toBeInTheDocument();
75
- });
76
- });
77
-
78
- it('should handle offline state', () => {
79
- const { ConnectionMonitor } = require('@/lib/errorHandling');
80
- const mockMonitor = ConnectionMonitor.getInstance();
81
- mockMonitor.getStatus.mockReturnValue(false);
82
- mockMonitor.getServerStatus.mockReturnValue('offline');
83
-
84
- render(<ConnectionStatus showWhenOnline={true} />);
85
-
86
- expect(screen.getByText('No Internet Connection')).toBeInTheDocument();
87
- });
88
-
89
- it('should handle server unavailable state', () => {
90
- const { ConnectionMonitor } = require('@/lib/errorHandling');
91
- const mockMonitor = ConnectionMonitor.getInstance();
92
- mockMonitor.getStatus.mockReturnValue(true);
93
- mockMonitor.getServerStatus.mockReturnValue('offline');
94
-
95
- render(<ConnectionStatus showWhenOnline={true} />);
96
-
97
- expect(screen.getByText('Server Unavailable')).toBeInTheDocument();
98
- expect(screen.getByText('Retry')).toBeInTheDocument();
99
- });
100
-
101
- it('should handle degraded service state', () => {
102
- const { ConnectionMonitor } = require('@/lib/errorHandling');
103
- const mockMonitor = ConnectionMonitor.getInstance();
104
- mockMonitor.getServiceHealth.mockReturnValue({
105
- status: 'degraded',
106
- timestamp: '2024-01-01T00:00:00Z',
107
- services: {
108
- qdrant: { status: 'healthy', collections_count: 1 },
109
- ollama: { status: 'unhealthy', error: 'Connection timeout' },
110
- embedding_model: { status: 'healthy', embedding_dimension: 384 }
111
- }
112
- });
113
-
114
- render(<ConnectionStatus showWhenOnline={true} showServiceDetails={true} />);
115
-
116
- expect(screen.getByText('Service Issues Detected')).toBeInTheDocument();
117
- });
118
-
119
- it('should call forceHealthCheck when retry button is clicked', async () => {
120
- const user = userEvent.setup();
121
- const { ConnectionMonitor } = require('@/lib/errorHandling');
122
- const mockMonitor = ConnectionMonitor.getInstance();
123
- mockMonitor.getStatus.mockReturnValue(true);
124
- mockMonitor.getServerStatus.mockReturnValue('offline');
125
-
126
- render(<ConnectionStatus showWhenOnline={true} />);
127
-
128
- const retryButton = screen.getByText('Retry');
129
- await user.click(retryButton);
130
-
131
- expect(mockMonitor.forceHealthCheck).toHaveBeenCalled();
132
- });
133
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/src/test/docker.integration.test.ts DELETED
@@ -1,378 +0,0 @@
1
- import { describe, it, expect, beforeEach, afterAll, vi } from 'vitest';
2
-
3
- // Create a separate mock fetch that doesn't interfere with MSW
4
- const mockFetch = vi.fn();
5
-
6
- describe('Docker Compose Service Integration Tests', () => {
7
- const BACKEND_URL = 'http://localhost:8000';
8
- const FRONTEND_URL = 'http://localhost:8080';
9
-
10
- beforeEach(() => {
11
- // Reset fetch mock before each test
12
- mockFetch.mockClear();
13
- });
14
-
15
- afterAll(() => {
16
- vi.restoreAllMocks();
17
- });
18
-
19
- describe('Service Startup and Connectivity', () => {
20
- it('should verify backend service is accessible', async () => {
21
- // Mock successful health check response
22
- mockFetch.mockResolvedValueOnce({
23
- ok: true,
24
- status: 200,
25
- json: async () => ({
26
- status: 'ok',
27
- timestamp: new Date().toISOString(),
28
- services: {
29
- qdrant: { status: 'healthy', collections_count: 1 },
30
- ollama: { status: 'healthy', model: 'llama3' },
31
- embedding_model: { status: 'healthy', embedding_dimension: 384 },
32
- },
33
- }),
34
- });
35
-
36
- const response = await fetch(`${BACKEND_URL}/health`);
37
- const healthData = await response.json();
38
-
39
- expect(response.ok).toBe(true);
40
- expect(healthData.status).toBe('ok');
41
- expect(healthData.services.qdrant.status).toBe('healthy');
42
- expect(healthData.services.ollama.status).toBe('healthy');
43
- expect(healthData.services.embedding_model.status).toBe('healthy');
44
- });
45
-
46
- it('should verify all backend services are running', async () => {
47
- // Mock health check with all services healthy
48
- mockFetch.mockResolvedValueOnce({
49
- ok: true,
50
- status: 200,
51
- json: async () => ({
52
- status: 'ok',
53
- services: {
54
- qdrant: {
55
- status: 'healthy',
56
- collections_count: 1,
57
- version: '1.0.0'
58
- },
59
- ollama: {
60
- status: 'healthy',
61
- model: 'llama3',
62
- available_models: ['llama3']
63
- },
64
- embedding_model: {
65
- status: 'healthy',
66
- embedding_dimension: 384,
67
- model_name: 'sentence-transformers'
68
- },
69
- },
70
- }),
71
- });
72
-
73
- const response = await fetch(`${BACKEND_URL}/health`);
74
- const healthData = await response.json();
75
-
76
- // Verify each service is properly configured
77
- expect(healthData.services.qdrant).toMatchObject({
78
- status: 'healthy',
79
- collections_count: expect.any(Number),
80
- });
81
-
82
- expect(healthData.services.ollama).toMatchObject({
83
- status: 'healthy',
84
- model: expect.any(String),
85
- });
86
-
87
- expect(healthData.services.embedding_model).toMatchObject({
88
- status: 'healthy',
89
- embedding_dimension: expect.any(Number),
90
- });
91
- });
92
-
93
- it('should handle degraded service states', async () => {
94
- // Mock health check with some services degraded
95
- mockFetch.mockResolvedValueOnce({
96
- ok: true,
97
- status: 200,
98
- json: async () => ({
99
- status: 'degraded',
100
- services: {
101
- qdrant: { status: 'healthy', collections_count: 1 },
102
- ollama: {
103
- status: 'unhealthy',
104
- error: 'Connection timeout'
105
- },
106
- embedding_model: { status: 'healthy', embedding_dimension: 384 },
107
- },
108
- }),
109
- });
110
-
111
- const response = await fetch(`${BACKEND_URL}/health`);
112
- const healthData = await response.json();
113
-
114
- expect(healthData.status).toBe('degraded');
115
- expect(healthData.services.ollama.status).toBe('unhealthy');
116
- expect(healthData.services.ollama.error).toBeDefined();
117
- });
118
- });
119
-
120
- describe('Network Configuration', () => {
121
- it('should verify CORS headers are properly configured', async () => {
122
- // Mock preflight OPTIONS request
123
- mockFetch.mockResolvedValueOnce({
124
- ok: true,
125
- status: 200,
126
- headers: new Map([
127
- ['access-control-allow-origin', 'http://localhost:8080'],
128
- ['access-control-allow-methods', 'GET, POST, PUT, DELETE, OPTIONS'],
129
- ['access-control-allow-headers', 'Content-Type, Authorization'],
130
- ['access-control-allow-credentials', 'true'],
131
- ]),
132
- });
133
-
134
- const response = await fetch(`${BACKEND_URL}/upload`, {
135
- method: 'OPTIONS',
136
- headers: {
137
- 'Origin': FRONTEND_URL,
138
- 'Access-Control-Request-Method': 'POST',
139
- 'Access-Control-Request-Headers': 'Content-Type',
140
- },
141
- });
142
-
143
- expect(response.ok).toBe(true);
144
- expect(response.headers.get('access-control-allow-origin')).toBe(FRONTEND_URL);
145
- expect(response.headers.get('access-control-allow-methods')).toContain('POST');
146
- });
147
-
148
- it('should verify API endpoints are accessible from frontend', async () => {
149
- const endpoints = [
150
- { path: '/health', method: 'GET' },
151
- { path: '/upload', method: 'POST' },
152
- { path: '/query', method: 'POST' },
153
- ];
154
-
155
- for (const endpoint of endpoints) {
156
- // Mock successful response for each endpoint
157
- mockFetch.mockResolvedValueOnce({
158
- ok: true,
159
- status: endpoint.method === 'GET' ? 200 : 405, // POST endpoints return 405 without data
160
- json: async () => ({ message: 'Endpoint accessible' }),
161
- });
162
-
163
- const response = await fetch(`${BACKEND_URL}${endpoint.path}`, {
164
- method: endpoint.method,
165
- headers: {
166
- 'Origin': FRONTEND_URL,
167
- 'Content-Type': 'application/json',
168
- },
169
- });
170
-
171
- // Verify endpoint is reachable (even if it returns method not allowed for GET on POST endpoints)
172
- expect(response.status).toBeLessThan(500);
173
- }
174
- });
175
-
176
- it('should handle network timeouts gracefully', async () => {
177
- // Mock network timeout
178
- mockFetch.mockRejectedValueOnce(new Error('Network timeout'));
179
-
180
- try {
181
- await fetch(`${BACKEND_URL}/health`, {
182
- signal: AbortSignal.timeout(1000),
183
- });
184
- } catch (error: any) {
185
- expect(error.message).toContain('timeout');
186
- }
187
- });
188
- });
189
-
190
- describe('Service Communication', () => {
191
- it('should verify backend can communicate with Qdrant', async () => {
192
- // Mock health check showing Qdrant connectivity
193
- mockFetch.mockResolvedValueOnce({
194
- ok: true,
195
- status: 200,
196
- json: async () => ({
197
- status: 'ok',
198
- services: {
199
- qdrant: {
200
- status: 'healthy',
201
- collections_count: 1,
202
- connection_info: {
203
- host: 'qdrant',
204
- port: 6333,
205
- collections: ['knowledge_base'],
206
- },
207
- },
208
- },
209
- }),
210
- });
211
-
212
- const response = await fetch(`${BACKEND_URL}/health`);
213
- const healthData = await response.json();
214
-
215
- expect(healthData.services.qdrant.status).toBe('healthy');
216
- expect(healthData.services.qdrant.collections_count).toBeGreaterThan(0);
217
- });
218
-
219
- it('should verify backend can communicate with Ollama', async () => {
220
- // Mock health check showing Ollama connectivity
221
- mockFetch.mockResolvedValueOnce({
222
- ok: true,
223
- status: 200,
224
- json: async () => ({
225
- status: 'ok',
226
- services: {
227
- ollama: {
228
- status: 'healthy',
229
- model: 'llama3',
230
- connection_info: {
231
- host: 'ollama',
232
- port: 11434,
233
- available_models: ['llama3'],
234
- },
235
- },
236
- },
237
- }),
238
- });
239
-
240
- const response = await fetch(`${BACKEND_URL}/health`);
241
- const healthData = await response.json();
242
-
243
- expect(healthData.services.ollama.status).toBe('healthy');
244
- expect(healthData.services.ollama.model).toBe('llama3');
245
- });
246
- });
247
-
248
- describe('Environment Configuration', () => {
249
- it('should verify environment variables are properly set', async () => {
250
- // Mock health check that includes environment info
251
- mockFetch.mockResolvedValueOnce({
252
- ok: true,
253
- status: 200,
254
- json: async () => ({
255
- status: 'ok',
256
- environment: {
257
- cors_origins: ['http://localhost:8080', 'http://127.0.0.1:8080'],
258
- qdrant_host: 'qdrant',
259
- ollama_host: 'ollama',
260
- debug_mode: false,
261
- },
262
- }),
263
- });
264
-
265
- const response = await fetch(`${BACKEND_URL}/health`);
266
- const healthData = await response.json();
267
-
268
- expect(healthData.environment.cors_origins).toContain(FRONTEND_URL);
269
- expect(healthData.environment.qdrant_host).toBe('qdrant');
270
- expect(healthData.environment.ollama_host).toBe('ollama');
271
- });
272
-
273
- it('should handle different deployment environments', async () => {
274
- const environments = ['development', 'production', 'testing'];
275
-
276
- for (const env of environments) {
277
- mockFetch.mockResolvedValueOnce({
278
- ok: true,
279
- status: 200,
280
- json: async () => ({
281
- status: 'ok',
282
- environment: {
283
- mode: env,
284
- cors_origins: env === 'production'
285
- ? ['https://app.example.com']
286
- : ['http://localhost:8080'],
287
- },
288
- }),
289
- });
290
-
291
- const response = await fetch(`${BACKEND_URL}/health`);
292
- const healthData = await response.json();
293
-
294
- expect(healthData.environment.mode).toBe(env);
295
- expect(Array.isArray(healthData.environment.cors_origins)).toBe(true);
296
- }
297
- });
298
- });
299
-
300
- describe('Performance and Reliability', () => {
301
- it('should verify service startup time is reasonable', async () => {
302
- const startTime = Date.now();
303
-
304
- // Mock health check response
305
- mockFetch.mockResolvedValueOnce({
306
- ok: true,
307
- status: 200,
308
- json: async () => ({
309
- status: 'ok',
310
- startup_time: '2.5s',
311
- services: {
312
- qdrant: { status: 'healthy', startup_time: '1.2s' },
313
- ollama: { status: 'healthy', startup_time: '2.1s' },
314
- embedding_model: { status: 'healthy', startup_time: '0.8s' },
315
- },
316
- }),
317
- });
318
-
319
- const response = await fetch(`${BACKEND_URL}/health`);
320
- const healthData = await response.json();
321
- const responseTime = Date.now() - startTime;
322
-
323
- expect(response.ok).toBe(true);
324
- expect(responseTime).toBeLessThan(5000); // Should respond within 5 seconds
325
- expect(healthData.startup_time).toBeDefined();
326
- });
327
-
328
- it('should verify service health monitoring works', async () => {
329
- // Mock multiple health checks to simulate monitoring
330
- const healthChecks = [
331
- { status: 'ok', timestamp: '2024-01-01T10:00:00Z' },
332
- { status: 'ok', timestamp: '2024-01-01T10:01:00Z' },
333
- { status: 'degraded', timestamp: '2024-01-01T10:02:00Z' },
334
- { status: 'ok', timestamp: '2024-01-01T10:03:00Z' },
335
- ];
336
-
337
- for (const check of healthChecks) {
338
- mockFetch.mockResolvedValueOnce({
339
- ok: true,
340
- status: 200,
341
- json: async () => check,
342
- });
343
-
344
- const response = await fetch(`${BACKEND_URL}/health`);
345
- const healthData = await response.json();
346
-
347
- expect(healthData.status).toBe(check.status);
348
- expect(healthData.timestamp).toBe(check.timestamp);
349
- }
350
- });
351
-
352
- it('should handle service recovery after failures', async () => {
353
- // Simulate service failure and recovery
354
- const scenarios = [
355
- { ok: false, status: 503 }, // Service unavailable
356
- { ok: false, status: 503 }, // Still down
357
- { ok: true, status: 200, json: async () => ({ status: 'ok' }) }, // Recovered
358
- ];
359
-
360
- for (const scenario of scenarios) {
361
- mockFetch.mockResolvedValueOnce(scenario);
362
-
363
- try {
364
- const response = await fetch(`${BACKEND_URL}/health`);
365
- if (response.ok) {
366
- const healthData = await response.json();
367
- expect(healthData.status).toBe('ok');
368
- } else {
369
- expect(response.status).toBe(503);
370
- }
371
- } catch (error) {
372
- // Expected for failed scenarios
373
- expect(error).toBeDefined();
374
- }
375
- }
376
- });
377
- });
378
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/src/test/health-check-demo.ts DELETED
@@ -1,86 +0,0 @@
1
- /**
2
- * Health Check Demo
3
- *
4
- * This file demonstrates the health check functionality implemented for task 6.
5
- * It shows how the ConnectionMonitor works with exponential backoff retry logic.
6
- */
7
-
8
- import { ConnectionMonitor, HealthCheckResponse } from '@/lib/errorHandling';
9
-
10
- // Demo function to show health check functionality
11
- export async function demoHealthCheck() {
12
- console.log('=== Health Check Demo ===');
13
-
14
- const monitor = ConnectionMonitor.getInstance();
15
-
16
- // Show current status
17
- console.log('Current online status:', monitor.getStatus());
18
- console.log('Current server status:', monitor.getServerStatus());
19
-
20
- // Add a health listener to see status changes
21
- const unsubscribeHealth = monitor.addHealthListener((health: HealthCheckResponse | null) => {
22
- if (health) {
23
- console.log('Health status updated:', {
24
- status: health.status,
25
- timestamp: health.timestamp,
26
- services: Object.keys(health.services).map(service => ({
27
- name: service,
28
- status: health.services[service as keyof typeof health.services]?.status,
29
- error: health.services[service as keyof typeof health.services]?.error
30
- }))
31
- });
32
- } else {
33
- console.log('Health status: null (offline or checking)');
34
- }
35
- });
36
-
37
- // Force a health check
38
- console.log('Forcing health check...');
39
- await monitor.forceHealthCheck();
40
-
41
- // Show final status
42
- console.log('Final server status:', monitor.getServerStatus());
43
- const serviceHealth = monitor.getServiceHealth();
44
- if (serviceHealth) {
45
- console.log('Service health:', {
46
- status: serviceHealth.status,
47
- services: Object.keys(serviceHealth.services).length
48
- });
49
- }
50
-
51
- // Cleanup
52
- unsubscribeHealth();
53
-
54
- console.log('=== Demo Complete ===');
55
- }
56
-
57
- // Features implemented for task 6:
58
- export const IMPLEMENTED_FEATURES = {
59
- healthCheck: {
60
- description: 'Frontend health check for backend connectivity',
61
- implementation: 'ConnectionMonitor.checkServerConnection() and forceHealthCheck()',
62
- status: 'COMPLETED'
63
- },
64
- serviceStatusIndicators: {
65
- description: 'Service status indicators in the UI',
66
- implementation: 'ConnectionStatus component with detailed service breakdown',
67
- status: 'COMPLETED'
68
- },
69
- exponentialBackoff: {
70
- description: 'Connection retry logic with exponential backoff',
71
- implementation: 'ConnectionMonitor with configurable retry attempts and delays',
72
- status: 'COMPLETED'
73
- },
74
- serviceAvailability: {
75
- description: 'Display service availability status to users',
76
- implementation: 'Real-time status updates with service health details',
77
- status: 'COMPLETED'
78
- },
79
- requirements: {
80
- '4.5': 'Service status monitoring - COMPLETED',
81
- '3.4': 'Connection error handling - COMPLETED',
82
- '3.5': 'User feedback for connection issues - COMPLETED'
83
- }
84
- };
85
-
86
- console.log('Task 6 Implementation Summary:', IMPLEMENTED_FEATURES);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/src/test/health-check.test.ts DELETED
@@ -1,226 +0,0 @@
1
- import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
- import { ConnectionMonitor, HealthCheckResponse } from '@/lib/errorHandling';
3
-
4
- // Mock fetch globally
5
- const mockFetch = vi.fn();
6
- global.fetch = mockFetch;
7
-
8
- // Mock toast
9
- vi.mock('@/hooks/use-toast', () => ({
10
- toast: vi.fn(),
11
- }));
12
-
13
- describe('ConnectionMonitor Health Check', () => {
14
- let monitor: ConnectionMonitor;
15
-
16
- beforeEach(() => {
17
- // Reset singleton instance
18
- (ConnectionMonitor as any).instance = null;
19
- monitor = ConnectionMonitor.getInstance();
20
- vi.clearAllMocks();
21
- });
22
-
23
- afterEach(() => {
24
- monitor.destroy();
25
- });
26
-
27
- it('should perform health check and return service status', async () => {
28
- const mockHealthResponse: HealthCheckResponse = {
29
- status: 'ok',
30
- timestamp: '2024-01-01T00:00:00Z',
31
- services: {
32
- qdrant: {
33
- status: 'healthy',
34
- collections_count: 1,
35
- },
36
- ollama: {
37
- status: 'healthy',
38
- model: 'llama3',
39
- },
40
- embedding_model: {
41
- status: 'healthy',
42
- embedding_dimension: 384,
43
- },
44
- },
45
- };
46
-
47
- mockFetch.mockResolvedValueOnce({
48
- ok: true,
49
- json: () => Promise.resolve(mockHealthResponse),
50
- });
51
-
52
- const isHealthy = await monitor.checkServerConnection();
53
- expect(isHealthy).toBe(true);
54
- expect(mockFetch).toHaveBeenCalledWith('/api/health', {
55
- method: 'GET',
56
- cache: 'no-cache',
57
- signal: expect.any(AbortSignal),
58
- });
59
- });
60
-
61
- it('should handle health check failure', async () => {
62
- mockFetch.mockRejectedValueOnce(new Error('Network error'));
63
-
64
- const isHealthy = await monitor.checkServerConnection();
65
- expect(isHealthy).toBe(false);
66
- });
67
-
68
- it('should implement exponential backoff on connection failures', async () => {
69
- vi.useFakeTimers();
70
-
71
- // Mock failed responses for all attempts
72
- mockFetch
73
- .mockRejectedValueOnce(new Error('Connection failed'))
74
- .mockRejectedValueOnce(new Error('Connection failed'))
75
- .mockRejectedValueOnce(new Error('Connection failed'))
76
- .mockRejectedValueOnce(new Error('Connection failed'))
77
- .mockRejectedValueOnce(new Error('Connection failed'))
78
- .mockRejectedValueOnce(new Error('Connection failed'));
79
-
80
- // Start health check (this will trigger the initial attempt)
81
- const healthCheckPromise = monitor.forceHealthCheck();
82
-
83
- // Wait for initial attempt
84
- await vi.runOnlyPendingTimersAsync();
85
-
86
- // Fast-forward through retry attempts
87
- await vi.advanceTimersByTimeAsync(1000); // First retry after 1s
88
- await vi.runOnlyPendingTimersAsync();
89
-
90
- await vi.advanceTimersByTimeAsync(2000); // Second retry after 2s
91
- await vi.runOnlyPendingTimersAsync();
92
-
93
- await vi.advanceTimersByTimeAsync(4000); // Third retry after 4s
94
- await vi.runOnlyPendingTimersAsync();
95
-
96
- await healthCheckPromise;
97
-
98
- // Should have made multiple attempts with exponential backoff
99
- expect(mockFetch).toHaveBeenCalledTimes(4); // Initial + 3 retries
100
-
101
- vi.useRealTimers();
102
- });
103
-
104
- it('should notify health listeners when status changes', async () => {
105
- const healthListener = vi.fn();
106
- const unsubscribe = monitor.addHealthListener(healthListener);
107
-
108
- const mockHealthResponse: HealthCheckResponse = {
109
- status: 'degraded',
110
- timestamp: '2024-01-01T00:00:00Z',
111
- services: {
112
- qdrant: {
113
- status: 'healthy',
114
- collections_count: 1,
115
- },
116
- ollama: {
117
- status: 'unhealthy',
118
- error: 'Model not loaded',
119
- },
120
- embedding_model: {
121
- status: 'healthy',
122
- embedding_dimension: 384,
123
- },
124
- },
125
- };
126
-
127
- mockFetch.mockResolvedValueOnce({
128
- ok: true,
129
- json: () => Promise.resolve(mockHealthResponse),
130
- });
131
-
132
- await monitor.forceHealthCheck();
133
-
134
- // Should be called with the health response (after initial null call)
135
- expect(healthListener).toHaveBeenCalledWith(expect.objectContaining({
136
- status: 'degraded',
137
- services: expect.objectContaining({
138
- ollama: expect.objectContaining({
139
- status: 'unhealthy',
140
- error: 'Model not loaded'
141
- })
142
- })
143
- }));
144
-
145
- unsubscribe();
146
- });
147
-
148
- it('should handle degraded service status', async () => {
149
- const mockHealthResponse: HealthCheckResponse = {
150
- status: 'degraded',
151
- timestamp: '2024-01-01T00:00:00Z',
152
- services: {
153
- qdrant: {
154
- status: 'healthy',
155
- collections_count: 1,
156
- },
157
- ollama: {
158
- status: 'unhealthy',
159
- error: 'Connection timeout',
160
- },
161
- embedding_model: {
162
- status: 'healthy',
163
- embedding_dimension: 384,
164
- },
165
- },
166
- };
167
-
168
- mockFetch.mockResolvedValueOnce({
169
- ok: true,
170
- json: () => Promise.resolve(mockHealthResponse),
171
- });
172
-
173
- await monitor.forceHealthCheck();
174
-
175
- const serviceHealth = monitor.getServiceHealth();
176
- expect(serviceHealth?.status).toBe('degraded');
177
- expect(serviceHealth?.services.ollama?.status).toBe('unhealthy');
178
- expect(serviceHealth?.services.ollama?.error).toBe('Connection timeout');
179
- });
180
-
181
- it('should reset retry attempts on successful connection', async () => {
182
- vi.useFakeTimers();
183
-
184
- // First, simulate failures then success
185
- mockFetch
186
- .mockRejectedValueOnce(new Error('Failed'))
187
- .mockRejectedValueOnce(new Error('Failed'))
188
- .mockResolvedValueOnce({
189
- ok: true,
190
- json: () => Promise.resolve({
191
- status: 'ok',
192
- timestamp: '2024-01-01T00:00:00Z',
193
- services: {},
194
- }),
195
- });
196
-
197
- // Start health check
198
- const healthCheckPromise = monitor.forceHealthCheck();
199
-
200
- // Fast-forward through retries
201
- await vi.runOnlyPendingTimersAsync();
202
- await vi.advanceTimersByTimeAsync(1000);
203
- await vi.runOnlyPendingTimersAsync();
204
- await vi.advanceTimersByTimeAsync(2000);
205
- await vi.runOnlyPendingTimersAsync();
206
-
207
- await healthCheckPromise;
208
-
209
- // Now force another health check - should succeed immediately
210
- mockFetch.mockResolvedValueOnce({
211
- ok: true,
212
- json: () => Promise.resolve({
213
- status: 'ok',
214
- timestamp: '2024-01-01T00:00:00Z',
215
- services: {},
216
- }),
217
- });
218
-
219
- await monitor.forceHealthCheck();
220
-
221
- // Should have reset retry attempts and be online
222
- expect(monitor.getServerStatus()).toBe('online');
223
-
224
- vi.useRealTimers();
225
- });
226
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag-quest-hub/vercel.json CHANGED
@@ -1,46 +1,11 @@
1
  {
2
- "version": 2,
3
- "buildCommand": "npm run build",
4
- "outputDirectory": "dist",
5
- "installCommand": "npm install",
6
- "framework": "vite",
7
  "rewrites": [
8
  {
9
- "source": "/api/(.*)",
10
- "destination": "/api/$1"
11
- },
12
- {
13
- "source": "/((?!api/).*)",
14
  "destination": "/index.html"
15
  }
16
  ],
17
- "headers": [
18
- {
19
- "source": "/api/(.*)",
20
- "headers": [
21
- {
22
- "key": "Access-Control-Allow-Origin",
23
- "value": "*"
24
- },
25
- {
26
- "key": "Access-Control-Allow-Methods",
27
- "value": "GET, POST, PUT, DELETE, OPTIONS"
28
- },
29
- {
30
- "key": "Access-Control-Allow-Headers",
31
- "value": "Content-Type, Authorization"
32
- }
33
- ]
34
- }
35
- ],
36
- "functions": {
37
- "api/**/*.js": {
38
- "runtime": "nodejs18.x"
39
- }
40
- },
41
- "env": {
42
- "VITE_API_BASE_URL": "/api",
43
- "VITE_API_TIMEOUT": "30000",
44
- "VITE_QUERY_TIMEOUT": "60000"
45
- }
46
  }
 
1
  {
 
 
 
 
 
2
  "rewrites": [
3
  {
4
+ "source": "/(.*)",
 
 
 
 
5
  "destination": "/index.html"
6
  }
7
  ],
8
+ "buildCommand": "npm run build",
9
+ "outputDirectory": "dist",
10
+ "installCommand": "npm install"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  }
railway-database-config.py DELETED
@@ -1,101 +0,0 @@
1
- """
2
- Railway Database Configuration Helper
3
- Handles both PostgreSQL (Railway managed) and SQLite fallback
4
- """
5
-
6
- import os
7
- import logging
8
- from urllib.parse import urlparse
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
- def get_railway_database_url():
13
- """
14
- Get the appropriate database URL for Railway deployment.
15
- Prioritizes Railway PostgreSQL, falls back to SQLite.
16
- """
17
- # Check for Railway PostgreSQL URL
18
- railway_db_url = os.getenv('DATABASE_URL')
19
-
20
- if railway_db_url and railway_db_url.startswith('postgresql'):
21
- logger.info("Using Railway PostgreSQL database")
22
- # Convert postgresql:// to postgresql+asyncpg:// for async support
23
- if railway_db_url.startswith('postgresql://'):
24
- railway_db_url = railway_db_url.replace('postgresql://', 'postgresql+asyncpg://', 1)
25
- return railway_db_url
26
-
27
- # Fallback to SQLite
28
- sqlite_url = "sqlite+aiosqlite:///./data/knowledge_assistant.db"
29
- logger.info("Using SQLite database fallback")
30
- return sqlite_url
31
-
32
- def get_railway_environment_config():
33
- """
34
- Get Railway-specific environment configuration
35
- """
36
- config = {
37
- 'database_url': get_railway_database_url(),
38
- 'port': int(os.getenv('PORT', 8000)),
39
- 'cors_origins': os.getenv('CORS_ORIGINS', '').split(',') if os.getenv('CORS_ORIGINS') else ['*'],
40
- 'jwt_secret': os.getenv('JWT_SECRET', 'railway-default-secret-change-in-production'),
41
- 'jwt_lifetime': int(os.getenv('JWT_LIFETIME_SECONDS', 3600)),
42
- 'user_registration_enabled': os.getenv('USER_REGISTRATION_ENABLED', 'true').lower() == 'true',
43
- 'email_verification_required': os.getenv('EMAIL_VERIFICATION_REQUIRED', 'false').lower() == 'true',
44
- }
45
-
46
- # External services configuration
47
- config.update({
48
- 'qdrant_host': os.getenv('QDRANT_HOST', 'localhost'),
49
- 'qdrant_port': int(os.getenv('QDRANT_PORT', 6333)),
50
- 'ollama_host': os.getenv('OLLAMA_HOST', 'localhost'),
51
- 'ollama_port': int(os.getenv('OLLAMA_PORT', 11434)),
52
- 'ollama_model': os.getenv('OLLAMA_MODEL', 'llama3.2:1b'),
53
- })
54
-
55
- # Optional external service URLs (for hybrid deployment)
56
- if os.getenv('QDRANT_CLOUD_URL'):
57
- config['qdrant_cloud_url'] = os.getenv('QDRANT_CLOUD_URL')
58
- config['qdrant_api_key'] = os.getenv('QDRANT_API_KEY')
59
-
60
- if os.getenv('OPENAI_API_KEY'):
61
- config['openai_api_key'] = os.getenv('OPENAI_API_KEY')
62
- config['use_openai'] = os.getenv('USE_OPENAI_INSTEAD_OF_OLLAMA', 'false').lower() == 'true'
63
-
64
- return config
65
-
66
- def validate_railway_config():
67
- """
68
- Validate Railway configuration and log warnings for missing required variables
69
- """
70
- required_vars = ['JWT_SECRET']
71
- missing_vars = []
72
-
73
- for var in required_vars:
74
- if not os.getenv(var):
75
- missing_vars.append(var)
76
-
77
- if missing_vars:
78
- logger.warning(f"Missing required environment variables: {', '.join(missing_vars)}")
79
- return False
80
-
81
- # Validate JWT secret strength
82
- jwt_secret = os.getenv('JWT_SECRET', '')
83
- if len(jwt_secret) < 32:
84
- logger.warning("JWT_SECRET should be at least 32 characters long for security")
85
-
86
- return True
87
-
88
- if __name__ == "__main__":
89
- # Test configuration
90
- logging.basicConfig(level=logging.INFO)
91
- config = get_railway_environment_config()
92
- is_valid = validate_railway_config()
93
-
94
- print("Railway Configuration:")
95
- for key, value in config.items():
96
- if 'secret' in key.lower() or 'key' in key.lower():
97
- print(f" {key}: {'*' * len(str(value)) if value else 'NOT SET'}")
98
- else:
99
- print(f" {key}: {value}")
100
-
101
- print(f"\nConfiguration valid: {is_valid}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
railway-health-check.sh DELETED
@@ -1,318 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Railway Health Check Script
4
- # Validates deployment health and service connectivity
5
-
6
- set -e
7
-
8
- # Colors for output
9
- RED='\033[0;31m'
10
- GREEN='\033[0;32m'
11
- YELLOW='\033[1;33m'
12
- BLUE='\033[0;34m'
13
- NC='\033[0m' # No Color
14
-
15
- # Configuration
16
- BACKEND_SERVICE="backend"
17
- FRONTEND_SERVICE="frontend"
18
- TIMEOUT=30
19
-
20
- # Logging functions
21
- log() {
22
- echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
23
- }
24
-
25
- error() {
26
- echo -e "${RED}[ERROR]${NC} $1" >&2
27
- }
28
-
29
- success() {
30
- echo -e "${GREEN}[SUCCESS]${NC} $1"
31
- }
32
-
33
- warning() {
34
- echo -e "${YELLOW}[WARNING]${NC} $1"
35
- }
36
-
37
- # Check if Railway CLI is available
38
- check_railway_cli() {
39
- if ! command -v railway &> /dev/null; then
40
- error "Railway CLI is not installed"
41
- exit 1
42
- fi
43
- }
44
-
45
- # Get service URL
46
- get_service_url() {
47
- local service_name=$1
48
- railway service use "$service_name" &> /dev/null
49
- local domain=$(railway domain 2>/dev/null | head -n1)
50
- if [ -n "$domain" ]; then
51
- echo "https://$domain"
52
- else
53
- echo ""
54
- fi
55
- }
56
-
57
- # Test HTTP endpoint
58
- test_endpoint() {
59
- local url=$1
60
- local description=$2
61
- local expected_status=${3:-200}
62
-
63
- log "Testing $description: $url"
64
-
65
- local response=$(curl -s -w "%{http_code}" -o /dev/null --max-time $TIMEOUT "$url" 2>/dev/null || echo "000")
66
-
67
- if [ "$response" = "$expected_status" ]; then
68
- success "$description is healthy (HTTP $response)"
69
- return 0
70
- else
71
- error "$description failed (HTTP $response)"
72
- return 1
73
- fi
74
- }
75
-
76
- # Test JSON API endpoint
77
- test_json_endpoint() {
78
- local url=$1
79
- local description=$2
80
-
81
- log "Testing $description: $url"
82
-
83
- local response=$(curl -s --max-time $TIMEOUT -H "Accept: application/json" "$url" 2>/dev/null)
84
- local status=$?
85
-
86
- if [ $status -eq 0 ] && echo "$response" | jq . &> /dev/null; then
87
- success "$description returned valid JSON"
88
- return 0
89
- else
90
- error "$description failed or returned invalid JSON"
91
- return 1
92
- fi
93
- }
94
-
95
- # Test backend health
96
- test_backend_health() {
97
- log "Testing backend service health..."
98
-
99
- local backend_url=$(get_service_url "$BACKEND_SERVICE")
100
- if [ -z "$backend_url" ]; then
101
- error "Backend URL not available"
102
- return 1
103
- fi
104
-
105
- log "Backend URL: $backend_url"
106
-
107
- # Test basic connectivity
108
- test_endpoint "$backend_url" "Backend root endpoint" || return 1
109
-
110
- # Test health endpoint
111
- test_json_endpoint "$backend_url/health" "Backend health endpoint" || return 1
112
-
113
- # Test API docs
114
- test_endpoint "$backend_url/docs" "Backend API documentation" || return 1
115
-
116
- # Test OpenAPI spec
117
- test_json_endpoint "$backend_url/openapi.json" "Backend OpenAPI specification" || return 1
118
-
119
- success "Backend service is healthy"
120
- return 0
121
- }
122
-
123
- # Test frontend health
124
- test_frontend_health() {
125
- log "Testing frontend service health..."
126
-
127
- local frontend_url=$(get_service_url "$FRONTEND_SERVICE")
128
- if [ -z "$frontend_url" ]; then
129
- error "Frontend URL not available"
130
- return 1
131
- fi
132
-
133
- log "Frontend URL: $frontend_url"
134
-
135
- # Test basic connectivity
136
- test_endpoint "$frontend_url" "Frontend application" || return 1
137
-
138
- # Test static assets (common paths)
139
- test_endpoint "$frontend_url/assets" "Frontend assets" 404 # 404 is expected for directory listing
140
-
141
- success "Frontend service is healthy"
142
- return 0
143
- }
144
-
145
- # Test service connectivity
146
- test_service_connectivity() {
147
- log "Testing service connectivity..."
148
-
149
- local backend_url=$(get_service_url "$BACKEND_SERVICE")
150
- local frontend_url=$(get_service_url "$FRONTEND_SERVICE")
151
-
152
- if [ -z "$backend_url" ] || [ -z "$frontend_url" ]; then
153
- warning "Cannot test connectivity - missing service URLs"
154
- return 1
155
- fi
156
-
157
- # Test CORS by checking if frontend can reach backend
158
- # This is a simplified test - in reality, CORS is tested by the browser
159
- log "Testing backend accessibility from frontend domain..."
160
-
161
- # Check if backend allows the frontend origin
162
- local cors_test=$(curl -s -H "Origin: $frontend_url" -H "Access-Control-Request-Method: GET" -X OPTIONS "$backend_url/health" -w "%{http_code}" -o /dev/null 2>/dev/null || echo "000")
163
-
164
- if [ "$cors_test" = "200" ] || [ "$cors_test" = "204" ]; then
165
- success "CORS configuration appears correct"
166
- else
167
- warning "CORS configuration may need adjustment (HTTP $cors_test)"
168
- fi
169
-
170
- return 0
171
- }
172
-
173
- # Test database connectivity
174
- test_database_connectivity() {
175
- log "Testing database connectivity..."
176
-
177
- local backend_url=$(get_service_url "$BACKEND_SERVICE")
178
- if [ -z "$backend_url" ]; then
179
- error "Backend URL not available for database test"
180
- return 1
181
- fi
182
-
183
- # Test database health through backend API
184
- # This assumes the backend has a database health check endpoint
185
- local db_health=$(curl -s --max-time $TIMEOUT "$backend_url/health" 2>/dev/null | jq -r '.database // "unknown"' 2>/dev/null || echo "unknown")
186
-
187
- if [ "$db_health" = "healthy" ] || [ "$db_health" = "ok" ]; then
188
- success "Database connectivity is healthy"
189
- elif [ "$db_health" = "unknown" ]; then
190
- warning "Database health status unknown"
191
- else
192
- error "Database connectivity issues detected"
193
- return 1
194
- fi
195
-
196
- return 0
197
- }
198
-
199
- # Generate health report
200
- generate_health_report() {
201
- log "Generating health report..."
202
-
203
- local backend_url=$(get_service_url "$BACKEND_SERVICE")
204
- local frontend_url=$(get_service_url "$FRONTEND_SERVICE")
205
-
206
- echo ""
207
- echo "=== Railway Deployment Health Report ==="
208
- echo "Generated: $(date)"
209
- echo ""
210
-
211
- if [ -n "$backend_url" ]; then
212
- echo "Backend Service:"
213
- echo " URL: $backend_url"
214
- echo " Health: $backend_url/health"
215
- echo " API Docs: $backend_url/docs"
216
- else
217
- echo "Backend Service: NOT AVAILABLE"
218
- fi
219
-
220
- echo ""
221
-
222
- if [ -n "$frontend_url" ]; then
223
- echo "Frontend Service:"
224
- echo " URL: $frontend_url"
225
- else
226
- echo "Frontend Service: NOT AVAILABLE"
227
- fi
228
-
229
- echo ""
230
- echo "Service Status:"
231
- railway service use "$BACKEND_SERVICE" &> /dev/null
232
- echo " Backend: $(railway status --json 2>/dev/null | jq -r '.status // "unknown"' 2>/dev/null || echo "unknown")"
233
-
234
- railway service use "$FRONTEND_SERVICE" &> /dev/null
235
- echo " Frontend: $(railway status --json 2>/dev/null | jq -r '.status // "unknown"' 2>/dev/null || echo "unknown")"
236
-
237
- echo ""
238
- echo "Recent Logs (last 10 lines):"
239
- echo "Backend:"
240
- railway service use "$BACKEND_SERVICE" &> /dev/null
241
- railway logs --tail 10 2>/dev/null | sed 's/^/ /' || echo " Logs not available"
242
-
243
- echo ""
244
- echo "Frontend:"
245
- railway service use "$FRONTEND_SERVICE" &> /dev/null
246
- railway logs --tail 10 2>/dev/null | sed 's/^/ /' || echo " Logs not available"
247
- }
248
-
249
- # Main health check function
250
- main() {
251
- log "Starting Railway deployment health check..."
252
-
253
- check_railway_cli
254
-
255
- local failed_tests=0
256
-
257
- # Run health tests
258
- test_backend_health || ((failed_tests++))
259
- test_frontend_health || ((failed_tests++))
260
- test_service_connectivity || ((failed_tests++))
261
- test_database_connectivity || ((failed_tests++))
262
-
263
- # Generate report
264
- generate_health_report
265
-
266
- echo ""
267
- if [ $failed_tests -eq 0 ]; then
268
- success "All health checks passed!"
269
- exit 0
270
- else
271
- error "$failed_tests health check(s) failed"
272
- echo ""
273
- echo "Troubleshooting tips:"
274
- echo "1. Check Railway dashboard for service status"
275
- echo "2. Review service logs: railway logs --service <service-name>"
276
- echo "3. Verify environment variables: railway variables"
277
- echo "4. Check resource usage and limits"
278
- echo "5. Ensure all services are deployed and running"
279
- exit 1
280
- fi
281
- }
282
-
283
- # Handle script arguments
284
- case "${1:-}" in
285
- --help|-h)
286
- echo "Railway Health Check Script"
287
- echo ""
288
- echo "Usage: $0 [options]"
289
- echo ""
290
- echo "Options:"
291
- echo " --help, -h Show this help message"
292
- echo " --backend-only Check only backend service"
293
- echo " --frontend-only Check only frontend service"
294
- echo " --report-only Generate health report only"
295
- echo ""
296
- exit 0
297
- ;;
298
- --backend-only)
299
- check_railway_cli
300
- test_backend_health
301
- ;;
302
- --frontend-only)
303
- check_railway_cli
304
- test_frontend_health
305
- ;;
306
- --report-only)
307
- check_railway_cli
308
- generate_health_report
309
- ;;
310
- "")
311
- main
312
- ;;
313
- *)
314
- error "Unknown option: $1"
315
- echo "Use --help for usage information"
316
- exit 1
317
- ;;
318
- esac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
railway.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "build": {
3
  "builder": "DOCKERFILE",
4
- "dockerfilePath": "Dockerfile"
5
  },
6
  "deploy": {
7
  "numReplicas": 1,
 
1
  {
2
  "build": {
3
  "builder": "DOCKERFILE",
4
+ "dockerfilePath": "Dockerfile.railway"
5
  },
6
  "deploy": {
7
  "numReplicas": 1,
requirements-railway.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.104.0,<0.105.0
2
+ uvicorn[standard]>=0.24.0,<0.25.0
3
+ python-multipart>=0.0.7
4
+ pydantic>=2.5.0,<3.0.0
5
+ PyMuPDF>=1.23.0,<2.0.0
6
+ pdfminer.six>=20221105
7
+ beautifulsoup4>=4.12.0,<5.0.0
8
+ qdrant-client>=1.7.0,<2.0.0
9
+ langchain>=0.0.350,<1.0.0
10
+ google-generativeai>=0.3.0,<1.0.0
11
+ fastapi-users[sqlalchemy]>=12.1.0,<13.0.0
12
+ passlib[bcrypt]>=1.7.0,<2.0.0
13
+ bcrypt>=4.0.0,<5.0.0
14
+ python-jose[cryptography]>=3.3.0,<4.0.0
15
+ pydantic-settings>=2.1.0,<3.0.0
16
+ sqlalchemy>=2.0.0,<3.0.0
17
+ alembic>=1.13.0,<2.0.0
18
+ aiosqlite>=0.19.0,<1.0.0
19
+ python-docx>=1.1.0,<2.0.0
20
+ psutil>=5.9.0,<6.0.0