First draft coding project
Browse files- .env +120 -0
- README.md +470 -12
- app.py +808 -0
- configs.py +372 -0
- demo.py +527 -0
- gettingstart.md +485 -0
- manage_services.py +550 -0
- requirements.txt +45 -0
- setup.py +511 -0
- test.py +1055 -0
.env
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =================================================================
|
| 2 |
+
# OCR SERVICE CONFIGURATION
|
| 3 |
+
# =================================================================
|
| 4 |
+
# Get these from your Azure Portal -> Document Intelligence resource
|
| 5 |
+
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://ocrservice256807.cognitiveservices.azure.com/
|
| 6 |
+
AZURE_DOCUMENT_INTELLIGENCE_KEY=3HVIJlvMH1AF5wuNSv0w1qd43AejgulvtdFInpFGJambLtr0DvISJQQJ99BGACqBBLyXJ3w3AAALACOG4NKs
|
| 7 |
+
|
| 8 |
+
# Server Configuration (Optional)
|
| 9 |
+
OCR_HOST=0.0.0.0
|
| 10 |
+
OCR_PORT=8400
|
| 11 |
+
OCR_DEBUG=True
|
| 12 |
+
OCR_LOG_LEVEL=INFO
|
| 13 |
+
|
| 14 |
+
# # CORS Configuration (Optional - for production)
|
| 15 |
+
# ALLOWED_ORIGINS=["http://localhost:3000", "https://yourdomain.com"]
|
| 16 |
+
|
| 17 |
+
# Rate Limiting (Optional - for production)
|
| 18 |
+
RATE_LIMIT_REQUESTS=100
|
| 19 |
+
RATE_LIMIT_WINDOW=3600
|
| 20 |
+
|
| 21 |
+
# Web Scraping Configuration (Optional)
|
| 22 |
+
MAX_IMAGES_PER_PAGE=10
|
| 23 |
+
REQUEST_TIMEOUT=30
|
| 24 |
+
USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
| 25 |
+
|
| 26 |
+
# ================================================================
|
| 27 |
+
# RAG SERVICE CONFIGURATION
|
| 28 |
+
# ================================================================
|
| 29 |
+
|
| 30 |
+
AZURE_OPENAI_ENDPOINT=https://ai-models-service256807.cognitiveservices.azure.com/
|
| 31 |
+
AZURE_OPENAI_KEY=3rz1lDxopNZktRyKpRbclNb8Evp5l3sgGy7ITNRgBdhKr1pGkY3OJQQJ99BGACYeBjFXJ3w3AAAAACOGcIO1
|
| 32 |
+
AZURE_OPENAI_DEPLOYMENT=text-embedding-3-small
|
| 33 |
+
AZURE_OPENAI_API_VERSION=2024-12-01-preview
|
| 34 |
+
|
| 35 |
+
PG_HOST=sbaipocpostgresql.postgres.database.azure.com
|
| 36 |
+
PG_PORT=5432
|
| 37 |
+
PG_DATABASE=vectorsearch
|
| 38 |
+
PG_USER=user
|
| 39 |
+
PG_PASSWORD="P@ssw0rd"
|
| 40 |
+
PG_SSL_MODE=require
|
| 41 |
+
|
| 42 |
+
OCR_SERVICE_URL=http://localhost:8400
|
| 43 |
+
|
| 44 |
+
RAG_HOST=0.0.0.0
|
| 45 |
+
RAG_PORT=8401
|
| 46 |
+
RAG_DEBUG=True
|
| 47 |
+
RAG_LOG_LEVEL=INFO
|
| 48 |
+
|
| 49 |
+
CHUNK_SIZE=1536
|
| 50 |
+
CHUNK_OVERLAP=100
|
| 51 |
+
MIN_CHUNK_SIZE=200
|
| 52 |
+
|
| 53 |
+
ALLOWED_ORIGINS=*
|
| 54 |
+
|
| 55 |
+
DEFAULT_SEARCH_LIMIT=10
|
| 56 |
+
DEFAULT_SIMILARITY_THRESHOLD=0.5
|
| 57 |
+
MAX_SEARCH_RESULTS=100
|
| 58 |
+
|
| 59 |
+
# Database connection pooling
|
| 60 |
+
DB_POOL_MIN_SIZE=2
|
| 61 |
+
DB_POOL_MAX_SIZE=20
|
| 62 |
+
DB_COMMAND_TIMEOUT=60
|
| 63 |
+
|
| 64 |
+
# Request timeouts (seconds)
|
| 65 |
+
REQUEST_TIMEOUT=30
|
| 66 |
+
EMBEDDING_TIMEOUT=60
|
| 67 |
+
|
| 68 |
+
SERVICE_VERSION=1.0.0
|
| 69 |
+
RAG_SERVICE_URL=http://localhost:8401
|
| 70 |
+
TEST_TIMEOUT=30
|
| 71 |
+
|
| 72 |
+
# =================================================================
|
| 73 |
+
# NER SERVICE CONFIGURATION
|
| 74 |
+
# =================================================================
|
| 75 |
+
|
| 76 |
+
# Server Configuration
|
| 77 |
+
NER_HOST=0.0.0.0
|
| 78 |
+
NER_PORT=8500
|
| 79 |
+
DEBUG=True
|
| 80 |
+
NER_LOG_LEVEL=INFO
|
| 81 |
+
|
| 82 |
+
# OCR Service Configuration (from your existing OCR service)
|
| 83 |
+
OCR_SERVICE_URL=http://localhost:8400
|
| 84 |
+
|
| 85 |
+
# DeepSeek API Configuration
|
| 86 |
+
# Get these from your Azure AI service or DeepSeek API
|
| 87 |
+
DEEPSEEK_ENDPOINT=https://ai-models-service256807.services.ai.azure.com/models
|
| 88 |
+
DEEPSEEK_API_KEY=3rz1lDxopNZktRyKpRbclNb8Evp5l3sgGy7ITNRgBdhKr1pGkY3OJQQJ99BGACYeBjFXJ3w3AAAAACOGcIO1
|
| 89 |
+
DEEPSEEK_MODEL=DeepSeek-R1-0528
|
| 90 |
+
|
| 91 |
+
# Azure OpenAI Configuration (for embeddings)
|
| 92 |
+
# Get these from your Azure OpenAI resource
|
| 93 |
+
AZURE_OPENAI_ENDPOINT=https://openaiservice2568.openai.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15
|
| 94 |
+
AZURE_OPENAI_API_KEY=8CZSXFphWviu1KBpweiUntRKrJgYR2hApSUT76f5MlBsSjuvKulnJQQJ99BCACYeBjFXJ3w3AAABACOGc2vU
|
| 95 |
+
EMBEDDING_MODEL=text-embedding-3-large
|
| 96 |
+
|
| 97 |
+
# Azure Storage Configuration (SAS Authentication)
|
| 98 |
+
# Option 1: Use Storage Account URL + SAS Token (Recommended)
|
| 99 |
+
AZURE_STORAGE_ACCOUNT_URL=https://historylog256807.blob.core.windows.net/
|
| 100 |
+
AZURE_BLOB_SAS_TOKEN="sp=racwdli&st=2025-07-07T09:05:50Z&se=2026-07-06T17:05:50Z&sv=2024-11-04&sr=c&sig=TKDn8t6QLFnO70bstW%2FH%2FjrYGczNnFyOap1qG9RTPEU%3D"
|
| 101 |
+
|
| 102 |
+
# Option 2: Use complete SAS URL (Alternative - leave blank if using Option 1)
|
| 103 |
+
#AZURE_BLOB_SAS_URL=https://historylog256807.blob.core.windows.net/historylog?sp=racwdli&st=2025-07-07T09:05:50Z&se=2026-07-06T17:05:50Z&sv=2024-11-04&sr=c&sig=TKDn8t6QLFnO70bstW%2FH%2FjrYGczNnFyOap1qG9RTPEU%3D
|
| 104 |
+
|
| 105 |
+
BLOB_CONTAINER=historylog
|
| 106 |
+
|
| 107 |
+
# PostgreSQL Configuration (Azure Database for PostgreSQL flexible server)
|
| 108 |
+
POSTGRES_HOST=sbaipocpostgresql.postgres.database.azure.com
|
| 109 |
+
POSTGRES_PORT=5432
|
| 110 |
+
POSTGRES_USER=user
|
| 111 |
+
POSTGRES_PASSWORD="P@ssw0rd"
|
| 112 |
+
POSTGRES_DATABASE=postgres
|
| 113 |
+
AZURE_OPENAI_DEPLOYMENT_NAME=text-embedding-3-large
|
| 114 |
+
|
| 115 |
+
# Processing Configuration
|
| 116 |
+
MAX_FILE_SIZE=50 # Maximum file size in MB
|
| 117 |
+
REQUEST_TIMEOUT=300 # Request timeout in seconds
|
| 118 |
+
|
| 119 |
+
# CORS Configuration (optional)
|
| 120 |
+
ALLOWED_ORIGINS=*
|
README.md
CHANGED
|
@@ -1,12 +1,470 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Unified AI Services
|
| 2 |
+
|
| 3 |
+
A comprehensive AI platform that integrates Named Entity Recognition (NER), Optical Character Recognition (OCR), and Retrieval-Augmented Generation (RAG) services into a unified application.
|
| 4 |
+
|
| 5 |
+
## π Features
|
| 6 |
+
|
| 7 |
+
### Core Services
|
| 8 |
+
- **NER Service** (Port 8500): Advanced named entity recognition with relationship extraction
|
| 9 |
+
- **OCR Service** (Port 8400): Document processing with Azure Document Intelligence
|
| 10 |
+
- **RAG Service** (Port 8401): Vector search and document retrieval
|
| 11 |
+
- **Unified App** (Port 8000): Coordinated workflows and service management
|
| 12 |
+
|
| 13 |
+
### Key Capabilities
|
| 14 |
+
- β
Multi-language support (Thai + English)
|
| 15 |
+
- β
Complex relationship extraction
|
| 16 |
+
- β
Entity deduplication
|
| 17 |
+
- β
Graph database exports (Neo4j, GraphML, GEXF)
|
| 18 |
+
- β
Vector search with semantic similarity
|
| 19 |
+
- β
Document processing (PDF, images, text)
|
| 20 |
+
- β
Real-time service health monitoring
|
| 21 |
+
- β
Unified workflows combining all services
|
| 22 |
+
- β
Comprehensive API documentation
|
| 23 |
+
|
| 24 |
+
## π Quick Start
|
| 25 |
+
|
| 26 |
+
### Prerequisites
|
| 27 |
+
- Python 3.8 or higher
|
| 28 |
+
- PostgreSQL with vector extension support
|
| 29 |
+
- Azure OpenAI account
|
| 30 |
+
- Azure Document Intelligence account
|
| 31 |
+
- DeepSeek API account (for advanced NER)
|
| 32 |
+
|
| 33 |
+
### Automated Setup
|
| 34 |
+
|
| 35 |
+
1. **Clone and navigate to the project directory**
|
| 36 |
+
```bash
|
| 37 |
+
cd unified-ai-services
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
2. **Run the automated setup**
|
| 41 |
+
```bash
|
| 42 |
+
python setup.py
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
This will:
|
| 46 |
+
- Check your Python environment
|
| 47 |
+
- Create necessary directories
|
| 48 |
+
- Help you configure .env file
|
| 49 |
+
- Install dependencies
|
| 50 |
+
- Validate configuration
|
| 51 |
+
- Create startup scripts
|
| 52 |
+
|
| 53 |
+
3. **Start the unified application**
|
| 54 |
+
```bash
|
| 55 |
+
python app.py
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
Or use the generated scripts:
|
| 59 |
+
- Windows: `start_services.bat`
|
| 60 |
+
- Unix/Linux/Mac: `./start_services.sh`
|
| 61 |
+
|
| 62 |
+
4. **Run comprehensive tests**
|
| 63 |
+
```bash
|
| 64 |
+
python test_unified.py
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
Or use the generated scripts:
|
| 68 |
+
- Windows: `run_tests.bat`
|
| 69 |
+
- Unix/Linux/Mac: `./run_tests.sh`
|
| 70 |
+
|
| 71 |
+
### Manual Setup
|
| 72 |
+
|
| 73 |
+
If you prefer manual setup:
|
| 74 |
+
|
| 75 |
+
1. **Install dependencies**
|
| 76 |
+
```bash
|
| 77 |
+
pip install -r requirements.txt
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
2. **Create .env file** (copy from .env.example)
|
| 81 |
+
```bash
|
| 82 |
+
cp .env.example .env
|
| 83 |
+
# Edit .env with your configuration
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
3. **Set up directories**
|
| 87 |
+
```bash
|
| 88 |
+
mkdir -p services exports logs temp tests data
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
4. **Place service files in the services directory**
|
| 92 |
+
```
|
| 93 |
+
services/
|
| 94 |
+
βββ ner_service.py
|
| 95 |
+
βββ ocr_service.py
|
| 96 |
+
βββ rag_service.py
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
## π Project Structure
|
| 100 |
+
|
| 101 |
+
```
|
| 102 |
+
unified-ai-services/
|
| 103 |
+
βββ app.py # Main unified application
|
| 104 |
+
βββ configs.py # Centralized configuration
|
| 105 |
+
βββ setup.py # Automated setup script
|
| 106 |
+
βββ requirements.txt # Python dependencies
|
| 107 |
+
βββ test_unified.py # Comprehensive test suite
|
| 108 |
+
βββ .env # Environment configuration
|
| 109 |
+
βββ services/ # Individual service files
|
| 110 |
+
β βββ ner_service.py # NER service implementation
|
| 111 |
+
β βββ ocr_service.py # OCR service implementation
|
| 112 |
+
β βββ rag_service.py # RAG service implementation
|
| 113 |
+
βββ exports/ # Generated export files
|
| 114 |
+
βββ logs/ # Application logs
|
| 115 |
+
βββ temp/ # Temporary files
|
| 116 |
+
βββ tests/ # Additional test files
|
| 117 |
+
βββ data/ # Data files
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
## βοΈ Configuration
|
| 121 |
+
|
| 122 |
+
### Environment Variables
|
| 123 |
+
|
| 124 |
+
The system uses a `.env` file for configuration. Key variables include:
|
| 125 |
+
|
| 126 |
+
#### Server Configuration
|
| 127 |
+
```bash
|
| 128 |
+
HOST=0.0.0.0
|
| 129 |
+
DEBUG=True
|
| 130 |
+
MAIN_PORT=8000
|
| 131 |
+
NER_PORT=8500
|
| 132 |
+
OCR_PORT=8400
|
| 133 |
+
RAG_PORT=8401
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
#### Database Configuration
|
| 137 |
+
```bash
|
| 138 |
+
POSTGRES_HOST=your-postgres-server.com
|
| 139 |
+
POSTGRES_PORT=5432
|
| 140 |
+
POSTGRES_USER=your-username
|
| 141 |
+
POSTGRES_PASSWORD=your-password
|
| 142 |
+
POSTGRES_DATABASE=postgres
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
#### Azure OpenAI Configuration
|
| 146 |
+
```bash
|
| 147 |
+
AZURE_OPENAI_ENDPOINT=https://your-openai.openai.azure.com/
|
| 148 |
+
AZURE_OPENAI_API_KEY=your-api-key
|
| 149 |
+
EMBEDDING_MODEL=text-embedding-3-large
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
#### DeepSeek Configuration
|
| 153 |
+
```bash
|
| 154 |
+
DEEPSEEK_ENDPOINT=https://your-deepseek-endpoint/
|
| 155 |
+
DEEPSEEK_API_KEY=your-deepseek-key
|
| 156 |
+
DEEPSEEK_MODEL=DeepSeek-R1-0528
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
#### Azure Document Intelligence Configuration
|
| 160 |
+
```bash
|
| 161 |
+
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://your-di.cognitiveservices.azure.com/
|
| 162 |
+
AZURE_DOCUMENT_INTELLIGENCE_KEY=your-di-key
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
#### Azure Storage Configuration
|
| 166 |
+
```bash
|
| 167 |
+
AZURE_STORAGE_ACCOUNT_URL=https://yourstorage.blob.core.windows.net/
|
| 168 |
+
AZURE_BLOB_SAS_TOKEN=your-sas-token
|
| 169 |
+
BLOB_CONTAINER=historylog
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
## π§ API Documentation
|
| 173 |
+
|
| 174 |
+
Once running, access the interactive API documentation:
|
| 175 |
+
- **Unified API**: http://localhost:8000/docs
|
| 176 |
+
- **NER Service**: http://localhost:8500/docs
|
| 177 |
+
- **OCR Service**: http://localhost:8400/docs
|
| 178 |
+
- **RAG Service**: http://localhost:8401/docs
|
| 179 |
+
|
| 180 |
+
## π― API Usage Examples
|
| 181 |
+
|
| 182 |
+
### 1. Unified Analysis (Text + RAG Indexing)
|
| 183 |
+
|
| 184 |
+
```python
|
| 185 |
+
import httpx
|
| 186 |
+
|
| 187 |
+
async def unified_analysis():
|
| 188 |
+
data = {
|
| 189 |
+
"text": "Your text content here...",
|
| 190 |
+
"extract_relationships": True,
|
| 191 |
+
"include_embeddings": False,
|
| 192 |
+
"generate_graph_files": True,
|
| 193 |
+
"export_formats": ["neo4j", "json"],
|
| 194 |
+
"enable_rag_indexing": True,
|
| 195 |
+
"rag_title": "My Document",
|
| 196 |
+
"rag_keywords": ["keyword1", "keyword2"]
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
async with httpx.AsyncClient() as client:
|
| 200 |
+
response = await client.post("http://localhost:8000/analyze/unified", json=data)
|
| 201 |
+
return response.json()
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
### 2. Combined Search with NER Analysis
|
| 205 |
+
|
| 206 |
+
```python
|
| 207 |
+
async def combined_search():
|
| 208 |
+
data = {
|
| 209 |
+
"query": "search query here",
|
| 210 |
+
"limit": 10,
|
| 211 |
+
"similarity_threshold": 0.2,
|
| 212 |
+
"include_ner_analysis": True
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
async with httpx.AsyncClient() as client:
|
| 216 |
+
response = await client.post("http://localhost:8000/search/combined", json=data)
|
| 217 |
+
return response.json()
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
### 3. File Upload Analysis
|
| 221 |
+
|
| 222 |
+
```python
|
| 223 |
+
async def analyze_file():
|
| 224 |
+
files = {"file": ("document.pdf", open("document.pdf", "rb"), "application/pdf")}
|
| 225 |
+
data = {
|
| 226 |
+
"extract_relationships": "true",
|
| 227 |
+
"generate_graph_files": "true",
|
| 228 |
+
"export_formats": "neo4j,json"
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
async with httpx.AsyncClient() as client:
|
| 232 |
+
response = await client.post("http://localhost:8000/ner/analyze/file", files=files, data=data)
|
| 233 |
+
return response.json()
|
| 234 |
+
```
|
| 235 |
+
|
| 236 |
+
## π§ͺ Testing
|
| 237 |
+
|
| 238 |
+
### Comprehensive Test Suite
|
| 239 |
+
|
| 240 |
+
The project includes comprehensive tests covering:
|
| 241 |
+
- β
Service health checks
|
| 242 |
+
- β
Individual service functionality
|
| 243 |
+
- β
Unified workflow testing
|
| 244 |
+
- β
Service proxy functionality
|
| 245 |
+
- β
Error handling and resilience
|
| 246 |
+
- β
Performance testing
|
| 247 |
+
- β
File upload/download testing
|
| 248 |
+
|
| 249 |
+
Run tests with:
|
| 250 |
+
```bash
|
| 251 |
+
python test_unified.py
|
| 252 |
+
```
|
| 253 |
+
|
| 254 |
+
### Individual Service Tests
|
| 255 |
+
|
| 256 |
+
Test individual services:
|
| 257 |
+
```bash
|
| 258 |
+
# Test NER service
|
| 259 |
+
python test_ner.py
|
| 260 |
+
|
| 261 |
+
# Test RAG service
|
| 262 |
+
python test_rag.py
|
| 263 |
+
```
|
| 264 |
+
|
| 265 |
+
### Quick Health Check
|
| 266 |
+
|
| 267 |
+
```bash
|
| 268 |
+
curl http://localhost:8000/health
|
| 269 |
+
```
|
| 270 |
+
|
| 271 |
+
## π Monitoring and Health Checks
|
| 272 |
+
|
| 273 |
+
### Health Endpoints
|
| 274 |
+
- **Unified System**: `GET /health`
|
| 275 |
+
- **Individual Services**: `GET /ner/health`, `GET /ocr/health`, `GET /rag/health`
|
| 276 |
+
- **Detailed Status**: `GET /status`
|
| 277 |
+
- **Service Discovery**: `GET /services`
|
| 278 |
+
|
| 279 |
+
### Monitoring Features
|
| 280 |
+
- Real-time service health monitoring
|
| 281 |
+
- Response time tracking
|
| 282 |
+
- Service uptime monitoring
|
| 283 |
+
- Error rate tracking
|
| 284 |
+
- Resource usage monitoring
|
| 285 |
+
|
| 286 |
+
## π Service Architecture
|
| 287 |
+
|
| 288 |
+
```mermaid
|
| 289 |
+
graph TB
|
| 290 |
+
Client[Client Applications]
|
| 291 |
+
|
| 292 |
+
subgraph "Unified AI Services (Port 8000)"
|
| 293 |
+
UA[Unified App]
|
| 294 |
+
Proxy[Service Proxies]
|
| 295 |
+
Health[Health Monitor]
|
| 296 |
+
end
|
| 297 |
+
|
| 298 |
+
subgraph "Core Services"
|
| 299 |
+
NER[NER Service<br/>Port 8500]
|
| 300 |
+
OCR[OCR Service<br/>Port 8400]
|
| 301 |
+
RAG[RAG Service<br/>Port 8401]
|
| 302 |
+
end
|
| 303 |
+
|
| 304 |
+
subgraph "External Services"
|
| 305 |
+
Azure[Azure Services]
|
| 306 |
+
DeepSeek[DeepSeek API]
|
| 307 |
+
DB[(PostgreSQL)]
|
| 308 |
+
end
|
| 309 |
+
|
| 310 |
+
Client --> UA
|
| 311 |
+
UA --> Proxy
|
| 312 |
+
Proxy --> NER
|
| 313 |
+
Proxy --> OCR
|
| 314 |
+
Proxy --> RAG
|
| 315 |
+
|
| 316 |
+
NER --> Azure
|
| 317 |
+
NER --> DeepSeek
|
| 318 |
+
NER --> DB
|
| 319 |
+
|
| 320 |
+
OCR --> Azure
|
| 321 |
+
|
| 322 |
+
RAG --> Azure
|
| 323 |
+
RAG --> DB
|
| 324 |
+
RAG --> OCR
|
| 325 |
+
```
|
| 326 |
+
|
| 327 |
+
## π οΈ Development
|
| 328 |
+
|
| 329 |
+
### Adding New Features
|
| 330 |
+
|
| 331 |
+
1. **Service Modifications**: Update individual service files in `services/`
|
| 332 |
+
2. **Unified Workflows**: Modify `app.py` for new combined workflows
|
| 333 |
+
3. **Configuration**: Update `configs.py` for new settings
|
| 334 |
+
4. **Tests**: Add tests to `test_unified.py`
|
| 335 |
+
|
| 336 |
+
### Debugging
|
| 337 |
+
|
| 338 |
+
1. **Check Service Logs**: Services log to console
|
| 339 |
+
2. **Health Checks**: Use `/health` endpoints
|
| 340 |
+
3. **Configuration**: Run `python configs.py` to validate
|
| 341 |
+
4. **Database**: Check PostgreSQL connectivity
|
| 342 |
+
5. **Azure Services**: Verify API keys and endpoints
|
| 343 |
+
|
| 344 |
+
### Service Management
|
| 345 |
+
|
| 346 |
+
Start individual services for development:
|
| 347 |
+
```bash
|
| 348 |
+
# Start NER service only
|
| 349 |
+
cd services && python ner_service.py
|
| 350 |
+
|
| 351 |
+
# Start OCR service only
|
| 352 |
+
cd services && python ocr_service.py
|
| 353 |
+
|
| 354 |
+
# Start RAG service only
|
| 355 |
+
cd services && python rag_service.py
|
| 356 |
+
```
|
| 357 |
+
|
| 358 |
+
## π¨ Troubleshooting
|
| 359 |
+
|
| 360 |
+
### Common Issues
|
| 361 |
+
|
| 362 |
+
#### 1. Services Won't Start
|
| 363 |
+
- Check port availability: `netstat -an | grep :8000`
|
| 364 |
+
- Verify Python dependencies: `pip list`
|
| 365 |
+
- Check .env configuration: `python configs.py`
|
| 366 |
+
|
| 367 |
+
#### 2. Database Connection Issues
|
| 368 |
+
- Verify PostgreSQL is running
|
| 369 |
+
- Check connection string in .env
|
| 370 |
+
- Test connectivity: `python -c "import asyncpg; asyncio.run(asyncpg.connect('your-connection-string'))"`
|
| 371 |
+
|
| 372 |
+
#### 3. Azure Service Issues
|
| 373 |
+
- Verify API keys and endpoints
|
| 374 |
+
- Check Azure service status
|
| 375 |
+
- Review rate limits and quotas
|
| 376 |
+
|
| 377 |
+
#### 4. Performance Issues
|
| 378 |
+
- Monitor resource usage: `top` or Task Manager
|
| 379 |
+
- Check database performance
|
| 380 |
+
- Review log files for errors
|
| 381 |
+
|
| 382 |
+
### Error Codes
|
| 383 |
+
|
| 384 |
+
- **500**: Internal service error
|
| 385 |
+
- **503**: Service unavailable
|
| 386 |
+
- **400**: Bad request (check input data)
|
| 387 |
+
- **422**: Validation error
|
| 388 |
+
- **404**: Endpoint not found
|
| 389 |
+
|
| 390 |
+
## π Performance Optimization
|
| 391 |
+
|
| 392 |
+
### Recommended Settings
|
| 393 |
+
|
| 394 |
+
#### Production Configuration
|
| 395 |
+
```bash
|
| 396 |
+
DEBUG=False
|
| 397 |
+
MAX_FILE_SIZE=50
|
| 398 |
+
REQUEST_TIMEOUT=300
|
| 399 |
+
CHUNK_SIZE=1000
|
| 400 |
+
CHUNK_OVERLAP=200
|
| 401 |
+
```
|
| 402 |
+
|
| 403 |
+
#### Database Optimization
|
| 404 |
+
- Use connection pooling
|
| 405 |
+
- Configure appropriate indexes
|
| 406 |
+
- Monitor query performance
|
| 407 |
+
- Regular maintenance
|
| 408 |
+
|
| 409 |
+
#### Service Optimization
|
| 410 |
+
- Enable caching where appropriate
|
| 411 |
+
- Use async operations
|
| 412 |
+
- Optimize batch processing
|
| 413 |
+
- Monitor memory usage
|
| 414 |
+
|
| 415 |
+
## π Security Considerations
|
| 416 |
+
|
| 417 |
+
### API Security
|
| 418 |
+
- Implement authentication/authorization as needed
|
| 419 |
+
- Use HTTPS in production
|
| 420 |
+
- Validate all input data
|
| 421 |
+
- Rate limiting
|
| 422 |
+
|
| 423 |
+
### Data Security
|
| 424 |
+
- Secure database connections (SSL)
|
| 425 |
+
- Encrypt sensitive data
|
| 426 |
+
- Regular security updates
|
| 427 |
+
- Monitor access logs
|
| 428 |
+
|
| 429 |
+
### Azure Security
|
| 430 |
+
- Rotate API keys regularly
|
| 431 |
+
- Use managed identities where possible
|
| 432 |
+
- Monitor usage and costs
|
| 433 |
+
- Follow Azure security best practices
|
| 434 |
+
|
| 435 |
+
## π License
|
| 436 |
+
|
| 437 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
| 438 |
+
|
| 439 |
+
## π€ Contributing
|
| 440 |
+
|
| 441 |
+
1. Fork the repository
|
| 442 |
+
2. Create a feature branch
|
| 443 |
+
3. Make your changes
|
| 444 |
+
4. Add tests for new functionality
|
| 445 |
+
5. Run the test suite
|
| 446 |
+
6. Submit a pull request
|
| 447 |
+
|
| 448 |
+
## π Support
|
| 449 |
+
|
| 450 |
+
For support and questions:
|
| 451 |
+
1. Check this README for common issues
|
| 452 |
+
2. Review the test suite for usage examples
|
| 453 |
+
3. Check service logs for error details
|
| 454 |
+
4. Verify configuration with `python configs.py`
|
| 455 |
+
|
| 456 |
+
## π― Roadmap
|
| 457 |
+
|
| 458 |
+
### Current Version (1.0.0)
|
| 459 |
+
- β
Unified service integration
|
| 460 |
+
- β
Comprehensive testing
|
| 461 |
+
- β
Multi-language support
|
| 462 |
+
- β
Graph database exports
|
| 463 |
+
|
| 464 |
+
### Future Enhancements
|
| 465 |
+
- π Advanced caching mechanisms
|
| 466 |
+
- π Enhanced monitoring and analytics
|
| 467 |
+
- π Additional export formats
|
| 468 |
+
- π Improved error recovery
|
| 469 |
+
- π Performance optimizations
|
| 470 |
+
- π Additional language support
|
app.py
ADDED
|
@@ -0,0 +1,808 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Unified AI Services Application
|
| 4 |
+
Coordinates NER, OCR, and RAG services with combined workflows
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import subprocess
|
| 9 |
+
import signal
|
| 10 |
+
import sys
|
| 11 |
+
import os
|
| 12 |
+
import time
|
| 13 |
+
import json
|
| 14 |
+
import logging
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from typing import Dict, List, Optional, Any, Union
|
| 17 |
+
from contextlib import asynccontextmanager
|
| 18 |
+
from datetime import datetime
|
| 19 |
+
import tempfile
|
| 20 |
+
import io
|
| 21 |
+
|
| 22 |
+
import httpx
|
| 23 |
+
import uvicorn
|
| 24 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Form, BackgroundTasks, Query
|
| 25 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 26 |
+
from fastapi.responses import FileResponse, StreamingResponse
|
| 27 |
+
from pydantic import BaseModel, HttpUrl
|
| 28 |
+
import psutil
|
| 29 |
+
|
| 30 |
+
# Import our configuration
|
| 31 |
+
from configs import get_config, validate_environment
|
| 32 |
+
|
| 33 |
+
# Get configuration
|
| 34 |
+
config = get_config()
|
| 35 |
+
|
| 36 |
+
# Setup logging
|
| 37 |
+
logging.basicConfig(
|
| 38 |
+
level=logging.INFO,
|
| 39 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 40 |
+
)
|
| 41 |
+
logger = logging.getLogger(__name__)
|
| 42 |
+
|
| 43 |
+
# Global service processes
|
| 44 |
+
service_processes: Dict[str, subprocess.Popen] = {}
|
| 45 |
+
service_health: Dict[str, bool] = {}
|
| 46 |
+
|
| 47 |
+
# Pydantic Models for Unified API
|
| 48 |
+
class ServiceStatus(BaseModel):
|
| 49 |
+
name: str
|
| 50 |
+
status: str
|
| 51 |
+
port: int
|
| 52 |
+
health: bool
|
| 53 |
+
uptime: Optional[float] = None
|
| 54 |
+
response_time: Optional[float] = None
|
| 55 |
+
|
| 56 |
+
class UnifiedAnalysisRequest(BaseModel):
|
| 57 |
+
text: Optional[str] = None
|
| 58 |
+
url: Optional[HttpUrl] = None
|
| 59 |
+
extract_relationships: bool = True
|
| 60 |
+
include_embeddings: bool = True
|
| 61 |
+
include_summary: bool = True
|
| 62 |
+
generate_graph_files: bool = True
|
| 63 |
+
export_formats: List[str] = ["neo4j", "json", "graphml"]
|
| 64 |
+
enable_rag_indexing: bool = False
|
| 65 |
+
rag_title: Optional[str] = None
|
| 66 |
+
rag_keywords: Optional[List[str]] = None
|
| 67 |
+
rag_metadata: Optional[Dict[str, Any]] = None
|
| 68 |
+
|
| 69 |
+
class CombinedSearchRequest(BaseModel):
|
| 70 |
+
query: str
|
| 71 |
+
limit: int = 10
|
| 72 |
+
similarity_threshold: float = 0.2
|
| 73 |
+
include_ner_analysis: bool = True
|
| 74 |
+
ner_export_formats: List[str] = ["json"]
|
| 75 |
+
|
| 76 |
+
class UnifiedResponse(BaseModel):
|
| 77 |
+
success: bool
|
| 78 |
+
service_calls: List[str]
|
| 79 |
+
ner_analysis: Optional[Dict[str, Any]] = None
|
| 80 |
+
rag_document: Optional[Dict[str, Any]] = None
|
| 81 |
+
search_results: Optional[Dict[str, Any]] = None
|
| 82 |
+
processing_time: float
|
| 83 |
+
error: Optional[str] = None
|
| 84 |
+
|
| 85 |
+
# Service Management Functions
|
| 86 |
+
async def start_service(service_name: str, script_path: str, port: int) -> bool:
|
| 87 |
+
"""Start a service as a subprocess"""
|
| 88 |
+
try:
|
| 89 |
+
logger.info(f"π Starting {service_name} service on port {port}")
|
| 90 |
+
|
| 91 |
+
# Check if port is already in use
|
| 92 |
+
if is_port_in_use(port):
|
| 93 |
+
logger.warning(f"Port {port} is already in use. Assuming {service_name} is already running.")
|
| 94 |
+
return True
|
| 95 |
+
|
| 96 |
+
# Start the service
|
| 97 |
+
if sys.platform == "win32":
|
| 98 |
+
process = subprocess.Popen([
|
| 99 |
+
sys.executable, script_path
|
| 100 |
+
], creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
|
| 101 |
+
else:
|
| 102 |
+
process = subprocess.Popen([
|
| 103 |
+
sys.executable, script_path
|
| 104 |
+
], preexec_fn=os.setsid)
|
| 105 |
+
|
| 106 |
+
service_processes[service_name] = process
|
| 107 |
+
|
| 108 |
+
# Wait for service to start
|
| 109 |
+
for i in range(30): # 30 second timeout
|
| 110 |
+
await asyncio.sleep(1)
|
| 111 |
+
if await check_service_health(service_name, port):
|
| 112 |
+
logger.info(f"β
{service_name} service started successfully")
|
| 113 |
+
service_health[service_name] = True
|
| 114 |
+
return True
|
| 115 |
+
|
| 116 |
+
logger.error(f"β {service_name} service failed to start within timeout")
|
| 117 |
+
return False
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
logger.error(f"β Failed to start {service_name} service: {e}")
|
| 121 |
+
return False
|
| 122 |
+
|
| 123 |
+
def is_port_in_use(port: int) -> bool:
|
| 124 |
+
"""Check if a port is already in use"""
|
| 125 |
+
try:
|
| 126 |
+
for conn in psutil.net_connections():
|
| 127 |
+
if conn.laddr.port == port:
|
| 128 |
+
return True
|
| 129 |
+
return False
|
| 130 |
+
except:
|
| 131 |
+
return False
|
| 132 |
+
|
| 133 |
+
async def check_service_health(service_name: str, port: int) -> bool:
|
| 134 |
+
"""Check if a service is healthy"""
|
| 135 |
+
try:
|
| 136 |
+
async with httpx.AsyncClient() as client:
|
| 137 |
+
response = await client.get(
|
| 138 |
+
f"http://localhost:{port}/health",
|
| 139 |
+
timeout=5.0
|
| 140 |
+
)
|
| 141 |
+
return response.status_code == 200
|
| 142 |
+
except:
|
| 143 |
+
return False
|
| 144 |
+
|
| 145 |
+
async def get_service_status(service_name: str, port: int) -> ServiceStatus:
|
| 146 |
+
"""Get detailed status of a service"""
|
| 147 |
+
start_time = time.time()
|
| 148 |
+
health = await check_service_health(service_name, port)
|
| 149 |
+
response_time = time.time() - start_time
|
| 150 |
+
|
| 151 |
+
uptime = None
|
| 152 |
+
if service_name in service_processes:
|
| 153 |
+
process = service_processes[service_name]
|
| 154 |
+
if process.poll() is None: # Process is running
|
| 155 |
+
try:
|
| 156 |
+
uptime = time.time() - psutil.Process(process.pid).create_time()
|
| 157 |
+
except:
|
| 158 |
+
uptime = None
|
| 159 |
+
|
| 160 |
+
return ServiceStatus(
|
| 161 |
+
name=service_name,
|
| 162 |
+
status="running" if health else "down",
|
| 163 |
+
port=port,
|
| 164 |
+
health=health,
|
| 165 |
+
uptime=uptime,
|
| 166 |
+
response_time=response_time
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
async def stop_all_services():
|
| 170 |
+
"""Stop all managed services"""
|
| 171 |
+
logger.info("π Stopping all services...")
|
| 172 |
+
|
| 173 |
+
for service_name, process in service_processes.items():
|
| 174 |
+
try:
|
| 175 |
+
if process.poll() is None: # Process is running
|
| 176 |
+
logger.info(f"Stopping {service_name}...")
|
| 177 |
+
|
| 178 |
+
if sys.platform == "win32":
|
| 179 |
+
process.send_signal(signal.CTRL_BREAK_EVENT)
|
| 180 |
+
else:
|
| 181 |
+
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
|
| 182 |
+
|
| 183 |
+
# Wait for graceful shutdown
|
| 184 |
+
try:
|
| 185 |
+
process.wait(timeout=10)
|
| 186 |
+
except subprocess.TimeoutExpired:
|
| 187 |
+
logger.warning(f"Force killing {service_name}")
|
| 188 |
+
process.kill()
|
| 189 |
+
|
| 190 |
+
logger.info(f"β
{service_name} stopped")
|
| 191 |
+
except Exception as e:
|
| 192 |
+
logger.error(f"Error stopping {service_name}: {e}")
|
| 193 |
+
|
| 194 |
+
# Service Communication Functions
|
| 195 |
+
async def call_ner_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
|
| 196 |
+
"""Call NER service endpoint"""
|
| 197 |
+
try:
|
| 198 |
+
async with httpx.AsyncClient(timeout=300.0) as client:
|
| 199 |
+
url = f"{config.NER_SERVICE_URL}{endpoint}"
|
| 200 |
+
response = await client.request(method, url, **kwargs)
|
| 201 |
+
|
| 202 |
+
if response.status_code == 200:
|
| 203 |
+
return response.json()
|
| 204 |
+
else:
|
| 205 |
+
raise HTTPException(status_code=response.status_code, detail=response.text)
|
| 206 |
+
|
| 207 |
+
except httpx.RequestError as e:
|
| 208 |
+
raise HTTPException(status_code=503, detail=f"NER service unavailable: {e}")
|
| 209 |
+
|
| 210 |
+
async def call_ocr_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
|
| 211 |
+
"""Call OCR service endpoint"""
|
| 212 |
+
try:
|
| 213 |
+
async with httpx.AsyncClient(timeout=300.0) as client:
|
| 214 |
+
url = f"{config.OCR_SERVICE_URL}{endpoint}"
|
| 215 |
+
response = await client.request(method, url, **kwargs)
|
| 216 |
+
|
| 217 |
+
if response.status_code == 200:
|
| 218 |
+
return response.json()
|
| 219 |
+
else:
|
| 220 |
+
raise HTTPException(status_code=response.status_code, detail=response.text)
|
| 221 |
+
|
| 222 |
+
except httpx.RequestError as e:
|
| 223 |
+
raise HTTPException(status_code=503, detail=f"OCR service unavailable: {e}")
|
| 224 |
+
|
| 225 |
+
async def call_rag_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
|
| 226 |
+
"""Call RAG service endpoint"""
|
| 227 |
+
try:
|
| 228 |
+
async with httpx.AsyncClient(timeout=300.0) as client:
|
| 229 |
+
url = f"{config.RAG_SERVICE_URL}{endpoint}"
|
| 230 |
+
response = await client.request(method, url, **kwargs)
|
| 231 |
+
|
| 232 |
+
if response.status_code == 200:
|
| 233 |
+
return response.json()
|
| 234 |
+
else:
|
| 235 |
+
raise HTTPException(status_code=response.status_code, detail=response.text)
|
| 236 |
+
|
| 237 |
+
except httpx.RequestError as e:
|
| 238 |
+
raise HTTPException(status_code=503, detail=f"RAG service unavailable: {e}")
|
| 239 |
+
|
| 240 |
+
# Application Lifecycle
|
| 241 |
+
@asynccontextmanager
|
| 242 |
+
async def lifespan(app: FastAPI):
|
| 243 |
+
"""Application lifespan management"""
|
| 244 |
+
logger.info("π Starting Unified AI Services Application")
|
| 245 |
+
|
| 246 |
+
# Print configuration summary
|
| 247 |
+
config.print_configuration_summary()
|
| 248 |
+
|
| 249 |
+
# Validate environment
|
| 250 |
+
if not validate_environment():
|
| 251 |
+
logger.error("β Environment validation failed. Please check your configuration.")
|
| 252 |
+
raise RuntimeError("Invalid environment configuration")
|
| 253 |
+
|
| 254 |
+
# Define service paths
|
| 255 |
+
service_definitions = [
|
| 256 |
+
("ocr", "services/ocr_service.py", config.ocr.PORT),
|
| 257 |
+
("rag", "services/rag_service.py", config.rag.PORT),
|
| 258 |
+
("ner", "services/ner_service.py", config.ner.PORT)
|
| 259 |
+
]
|
| 260 |
+
|
| 261 |
+
# Start services
|
| 262 |
+
started_services = []
|
| 263 |
+
for service_name, script_path, port in service_definitions:
|
| 264 |
+
if os.path.exists(script_path):
|
| 265 |
+
success = await start_service(service_name, script_path, port)
|
| 266 |
+
if success:
|
| 267 |
+
started_services.append(service_name)
|
| 268 |
+
else:
|
| 269 |
+
logger.error(f"Failed to start {service_name} service")
|
| 270 |
+
else:
|
| 271 |
+
logger.warning(f"Service script not found: {script_path}")
|
| 272 |
+
|
| 273 |
+
if len(started_services) == 0:
|
| 274 |
+
logger.error("β No services could be started")
|
| 275 |
+
raise RuntimeError("Failed to start any services")
|
| 276 |
+
|
| 277 |
+
logger.info(f"β
Started {len(started_services)} services: {', '.join(started_services)}")
|
| 278 |
+
|
| 279 |
+
# Yield control to the application
|
| 280 |
+
yield
|
| 281 |
+
|
| 282 |
+
# Cleanup
|
| 283 |
+
await stop_all_services()
|
| 284 |
+
logger.info("π Unified AI Services Application shutdown complete")
|
| 285 |
+
|
| 286 |
+
# FastAPI Application
|
| 287 |
+
app = FastAPI(
|
| 288 |
+
title="Unified AI Services",
|
| 289 |
+
description="Coordinated NER, OCR, and RAG services with combined workflows",
|
| 290 |
+
version="1.0.0",
|
| 291 |
+
lifespan=lifespan
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
# CORS configuration
|
| 295 |
+
allowed_origins = config.ner.ALLOWED_ORIGINS
|
| 296 |
+
if allowed_origins != "*":
|
| 297 |
+
try:
|
| 298 |
+
allowed_origins = json.loads(allowed_origins)
|
| 299 |
+
except:
|
| 300 |
+
allowed_origins = ["*"]
|
| 301 |
+
|
| 302 |
+
app.add_middleware(
|
| 303 |
+
CORSMiddleware,
|
| 304 |
+
allow_origins=allowed_origins,
|
| 305 |
+
allow_credentials=True,
|
| 306 |
+
allow_methods=["*"],
|
| 307 |
+
allow_headers=["*"],
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
# Main API Endpoints
|
| 311 |
+
@app.get("/")
|
| 312 |
+
async def root():
|
| 313 |
+
return {
|
| 314 |
+
"message": "Unified AI Services",
|
| 315 |
+
"version": "1.0.0",
|
| 316 |
+
"services": {
|
| 317 |
+
"ner": f"{config.NER_SERVICE_URL}",
|
| 318 |
+
"ocr": f"{config.OCR_SERVICE_URL}",
|
| 319 |
+
"rag": f"{config.RAG_SERVICE_URL}"
|
| 320 |
+
},
|
| 321 |
+
"unified_endpoints": {
|
| 322 |
+
"status": "/status",
|
| 323 |
+
"analyze": "/analyze",
|
| 324 |
+
"search": "/search",
|
| 325 |
+
"combined": "/combined/*"
|
| 326 |
+
}
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
@app.get("/health")
|
| 330 |
+
async def unified_health():
|
| 331 |
+
"""Unified health check for all services"""
|
| 332 |
+
services = [
|
| 333 |
+
("ner", config.ner.PORT),
|
| 334 |
+
("ocr", config.ocr.PORT),
|
| 335 |
+
("rag", config.rag.PORT)
|
| 336 |
+
]
|
| 337 |
+
|
| 338 |
+
service_statuses = []
|
| 339 |
+
overall_healthy = True
|
| 340 |
+
|
| 341 |
+
for service_name, port in services:
|
| 342 |
+
status = await get_service_status(service_name, port)
|
| 343 |
+
service_statuses.append(status.dict())
|
| 344 |
+
if not status.health:
|
| 345 |
+
overall_healthy = False
|
| 346 |
+
|
| 347 |
+
return {
|
| 348 |
+
"status": "healthy" if overall_healthy else "degraded",
|
| 349 |
+
"services": service_statuses,
|
| 350 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 351 |
+
"configuration": {
|
| 352 |
+
"ner_url": config.NER_SERVICE_URL,
|
| 353 |
+
"ocr_url": config.OCR_SERVICE_URL,
|
| 354 |
+
"rag_url": config.RAG_SERVICE_URL
|
| 355 |
+
}
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
@app.get("/status")
|
| 359 |
+
async def detailed_status():
|
| 360 |
+
"""Detailed status of all services"""
|
| 361 |
+
services = [
|
| 362 |
+
("ner", config.ner.PORT),
|
| 363 |
+
("ocr", config.ocr.PORT),
|
| 364 |
+
("rag", config.rag.PORT)
|
| 365 |
+
]
|
| 366 |
+
|
| 367 |
+
detailed_statuses = {}
|
| 368 |
+
|
| 369 |
+
for service_name, port in services:
|
| 370 |
+
try:
|
| 371 |
+
# Get service-specific health data
|
| 372 |
+
async with httpx.AsyncClient() as client:
|
| 373 |
+
response = await client.get(f"http://localhost:{port}/health", timeout=10.0)
|
| 374 |
+
if response.status_code == 200:
|
| 375 |
+
detailed_statuses[service_name] = response.json()
|
| 376 |
+
else:
|
| 377 |
+
detailed_statuses[service_name] = {"status": "error", "error": f"HTTP {response.status_code}"}
|
| 378 |
+
except Exception as e:
|
| 379 |
+
detailed_statuses[service_name] = {"status": "unreachable", "error": str(e)}
|
| 380 |
+
|
| 381 |
+
return {
|
| 382 |
+
"unified_app": {
|
| 383 |
+
"status": "running",
|
| 384 |
+
"port": config.MAIN_PORT,
|
| 385 |
+
"uptime": time.time() - start_time if 'start_time' in globals() else 0
|
| 386 |
+
},
|
| 387 |
+
"services": detailed_statuses,
|
| 388 |
+
"configuration_valid": validate_environment()
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
# Unified Analysis Endpoints
|
| 392 |
+
@app.post("/analyze/unified")
|
| 393 |
+
async def unified_analysis(request: UnifiedAnalysisRequest):
|
| 394 |
+
"""Unified analysis combining NER and optional RAG indexing"""
|
| 395 |
+
start_time = time.time()
|
| 396 |
+
service_calls = []
|
| 397 |
+
|
| 398 |
+
try:
|
| 399 |
+
# Step 1: NER Analysis
|
| 400 |
+
ner_data = {
|
| 401 |
+
"text": request.text,
|
| 402 |
+
"url": str(request.url) if request.url else None,
|
| 403 |
+
"extract_relationships": request.extract_relationships,
|
| 404 |
+
"include_embeddings": request.include_embeddings,
|
| 405 |
+
"include_summary": request.include_summary,
|
| 406 |
+
"generate_graph_files": request.generate_graph_files,
|
| 407 |
+
"export_formats": request.export_formats
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
# Remove None values
|
| 411 |
+
ner_data = {k: v for k, v in ner_data.items() if v is not None}
|
| 412 |
+
|
| 413 |
+
if request.text:
|
| 414 |
+
ner_result = await call_ner_service("/analyze/text", "POST", json=ner_data)
|
| 415 |
+
service_calls.append("ner_text")
|
| 416 |
+
elif request.url:
|
| 417 |
+
ner_result = await call_ner_service("/analyze/url", "POST", json=ner_data)
|
| 418 |
+
service_calls.append("ner_url")
|
| 419 |
+
else:
|
| 420 |
+
raise HTTPException(status_code=400, detail="Either text or url must be provided")
|
| 421 |
+
|
| 422 |
+
# Step 2: Optional RAG indexing
|
| 423 |
+
rag_result = None
|
| 424 |
+
if request.enable_rag_indexing and ner_result.get("success"):
|
| 425 |
+
try:
|
| 426 |
+
rag_data = {
|
| 427 |
+
"title": request.rag_title or f"NER Analysis {ner_result.get('analysis_id', 'unknown')}",
|
| 428 |
+
"keywords": request.rag_keywords or ner_result.get("keywords", []),
|
| 429 |
+
"metadata": {
|
| 430 |
+
**(request.rag_metadata or {}),
|
| 431 |
+
"ner_analysis_id": ner_result.get("analysis_id"),
|
| 432 |
+
"entity_count": len(ner_result.get("entities", [])),
|
| 433 |
+
"relationship_count": len(ner_result.get("relationships", []))
|
| 434 |
+
}
|
| 435 |
+
}
|
| 436 |
+
|
| 437 |
+
if request.text:
|
| 438 |
+
# Create temporary file for RAG service
|
| 439 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
|
| 440 |
+
f.write(request.text)
|
| 441 |
+
temp_path = f.name
|
| 442 |
+
|
| 443 |
+
try:
|
| 444 |
+
with open(temp_path, 'rb') as f:
|
| 445 |
+
files = {"file": ("ner_analysis.txt", f, "text/plain")}
|
| 446 |
+
form_data = {
|
| 447 |
+
"title": rag_data["title"],
|
| 448 |
+
"keywords": json.dumps(rag_data["keywords"]),
|
| 449 |
+
"metadata": json.dumps(rag_data["metadata"])
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
async with httpx.AsyncClient(timeout=300.0) as client:
|
| 453 |
+
response = await client.post(
|
| 454 |
+
f"{config.RAG_SERVICE_URL}/documents/upload",
|
| 455 |
+
files=files,
|
| 456 |
+
data=form_data
|
| 457 |
+
)
|
| 458 |
+
if response.status_code == 200:
|
| 459 |
+
rag_result = response.json()
|
| 460 |
+
service_calls.append("rag_upload")
|
| 461 |
+
finally:
|
| 462 |
+
os.unlink(temp_path)
|
| 463 |
+
|
| 464 |
+
elif request.url:
|
| 465 |
+
async with httpx.AsyncClient(timeout=300.0) as client:
|
| 466 |
+
response = await client.post(
|
| 467 |
+
f"{config.RAG_SERVICE_URL}/documents/url",
|
| 468 |
+
json={
|
| 469 |
+
"url": str(request.url),
|
| 470 |
+
**rag_data,
|
| 471 |
+
"extract_images": True
|
| 472 |
+
}
|
| 473 |
+
)
|
| 474 |
+
if response.status_code == 200:
|
| 475 |
+
rag_result = response.json()
|
| 476 |
+
service_calls.append("rag_url")
|
| 477 |
+
|
| 478 |
+
except Exception as e:
|
| 479 |
+
logger.warning(f"RAG indexing failed: {e}")
|
| 480 |
+
# Continue without RAG result
|
| 481 |
+
|
| 482 |
+
processing_time = time.time() - start_time
|
| 483 |
+
|
| 484 |
+
return UnifiedResponse(
|
| 485 |
+
success=True,
|
| 486 |
+
service_calls=service_calls,
|
| 487 |
+
ner_analysis=ner_result,
|
| 488 |
+
rag_document=rag_result,
|
| 489 |
+
processing_time=processing_time
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
except Exception as e:
|
| 493 |
+
processing_time = time.time() - start_time
|
| 494 |
+
logger.error(f"Unified analysis failed: {e}")
|
| 495 |
+
|
| 496 |
+
return UnifiedResponse(
|
| 497 |
+
success=False,
|
| 498 |
+
service_calls=service_calls,
|
| 499 |
+
processing_time=processing_time,
|
| 500 |
+
error=str(e)
|
| 501 |
+
)
|
| 502 |
+
|
| 503 |
+
@app.post("/search/combined")
|
| 504 |
+
async def combined_search(request: CombinedSearchRequest):
|
| 505 |
+
"""Combined search using RAG with optional NER analysis of results"""
|
| 506 |
+
start_time = time.time()
|
| 507 |
+
service_calls = []
|
| 508 |
+
|
| 509 |
+
try:
|
| 510 |
+
# Step 1: RAG Search
|
| 511 |
+
search_data = {
|
| 512 |
+
"query": request.query,
|
| 513 |
+
"limit": request.limit,
|
| 514 |
+
"similarity_threshold": request.similarity_threshold
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
search_result = await call_rag_service("/search", "POST", json=search_data)
|
| 518 |
+
service_calls.append("rag_search")
|
| 519 |
+
|
| 520 |
+
# Step 2: Optional NER analysis of search results
|
| 521 |
+
ner_results = []
|
| 522 |
+
if request.include_ner_analysis and search_result.get("results"):
|
| 523 |
+
for i, result in enumerate(search_result["results"][:3]): # Analyze top 3 results
|
| 524 |
+
chunk_content = result.get("chunk", {}).get("content", "")
|
| 525 |
+
if chunk_content:
|
| 526 |
+
try:
|
| 527 |
+
ner_data = {
|
| 528 |
+
"text": chunk_content,
|
| 529 |
+
"extract_relationships": True,
|
| 530 |
+
"include_embeddings": False,
|
| 531 |
+
"include_summary": False,
|
| 532 |
+
"generate_graph_files": False,
|
| 533 |
+
"export_formats": request.ner_export_formats
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
ner_result = await call_ner_service("/analyze/text", "POST", json=ner_data)
|
| 537 |
+
ner_results.append({
|
| 538 |
+
"result_index": i,
|
| 539 |
+
"ner_analysis": ner_result
|
| 540 |
+
})
|
| 541 |
+
service_calls.append(f"ner_text_{i}")
|
| 542 |
+
|
| 543 |
+
except Exception as e:
|
| 544 |
+
logger.warning(f"NER analysis failed for result {i}: {e}")
|
| 545 |
+
|
| 546 |
+
processing_time = time.time() - start_time
|
| 547 |
+
|
| 548 |
+
return UnifiedResponse(
|
| 549 |
+
success=True,
|
| 550 |
+
service_calls=service_calls,
|
| 551 |
+
search_results={
|
| 552 |
+
**search_result,
|
| 553 |
+
"ner_analyses": ner_results
|
| 554 |
+
},
|
| 555 |
+
processing_time=processing_time
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
except Exception as e:
|
| 559 |
+
processing_time = time.time() - start_time
|
| 560 |
+
logger.error(f"Combined search failed: {e}")
|
| 561 |
+
|
| 562 |
+
return UnifiedResponse(
|
| 563 |
+
success=False,
|
| 564 |
+
service_calls=service_calls,
|
| 565 |
+
processing_time=processing_time,
|
| 566 |
+
error=str(e)
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
# Service Proxy Endpoints
|
| 570 |
+
@app.api_route("/ner/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
|
| 571 |
+
async def ner_proxy(path: str, request):
|
| 572 |
+
"""Proxy requests to NER service"""
|
| 573 |
+
try:
|
| 574 |
+
async with httpx.AsyncClient(timeout=300.0) as client:
|
| 575 |
+
url = f"{config.NER_SERVICE_URL}/{path}"
|
| 576 |
+
|
| 577 |
+
# Forward the request
|
| 578 |
+
if request.method == "GET":
|
| 579 |
+
response = await client.get(url, params=request.query_params)
|
| 580 |
+
else:
|
| 581 |
+
# Handle different content types
|
| 582 |
+
content_type = request.headers.get("content-type", "")
|
| 583 |
+
|
| 584 |
+
if "multipart/form-data" in content_type:
|
| 585 |
+
# Handle file uploads
|
| 586 |
+
form = await request.form()
|
| 587 |
+
files = {}
|
| 588 |
+
data = {}
|
| 589 |
+
|
| 590 |
+
for key, value in form.items():
|
| 591 |
+
if hasattr(value, 'read'): # File-like object
|
| 592 |
+
files[key] = (value.filename, await value.read(), value.content_type)
|
| 593 |
+
else:
|
| 594 |
+
data[key] = value
|
| 595 |
+
|
| 596 |
+
response = await client.request(request.method, url, files=files, data=data)
|
| 597 |
+
else:
|
| 598 |
+
# Handle JSON/other content
|
| 599 |
+
body = await request.body()
|
| 600 |
+
response = await client.request(
|
| 601 |
+
request.method,
|
| 602 |
+
url,
|
| 603 |
+
content=body,
|
| 604 |
+
headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
|
| 605 |
+
)
|
| 606 |
+
|
| 607 |
+
# Return response
|
| 608 |
+
return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
|
| 609 |
+
|
| 610 |
+
except httpx.RequestError as e:
|
| 611 |
+
raise HTTPException(status_code=503, detail=f"NER service unavailable: {e}")
|
| 612 |
+
|
| 613 |
+
@app.api_route("/ocr/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
|
| 614 |
+
async def ocr_proxy(path: str, request):
|
| 615 |
+
"""Proxy requests to OCR service"""
|
| 616 |
+
try:
|
| 617 |
+
async with httpx.AsyncClient(timeout=300.0) as client:
|
| 618 |
+
url = f"{config.OCR_SERVICE_URL}/{path}"
|
| 619 |
+
|
| 620 |
+
# Forward the request
|
| 621 |
+
if request.method == "GET":
|
| 622 |
+
response = await client.get(url, params=request.query_params)
|
| 623 |
+
else:
|
| 624 |
+
# Handle different content types
|
| 625 |
+
content_type = request.headers.get("content-type", "")
|
| 626 |
+
|
| 627 |
+
if "multipart/form-data" in content_type:
|
| 628 |
+
# Handle file uploads
|
| 629 |
+
form = await request.form()
|
| 630 |
+
files = {}
|
| 631 |
+
data = {}
|
| 632 |
+
|
| 633 |
+
for key, value in form.items():
|
| 634 |
+
if hasattr(value, 'read'): # File-like object
|
| 635 |
+
files[key] = (value.filename, await value.read(), value.content_type)
|
| 636 |
+
else:
|
| 637 |
+
data[key] = value
|
| 638 |
+
|
| 639 |
+
response = await client.request(request.method, url, files=files, data=data)
|
| 640 |
+
else:
|
| 641 |
+
# Handle JSON/other content
|
| 642 |
+
body = await request.body()
|
| 643 |
+
response = await client.request(
|
| 644 |
+
request.method,
|
| 645 |
+
url,
|
| 646 |
+
content=body,
|
| 647 |
+
headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
# Return response
|
| 651 |
+
return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
|
| 652 |
+
|
| 653 |
+
except httpx.RequestError as e:
|
| 654 |
+
raise HTTPException(status_code=503, detail=f"OCR service unavailable: {e}")
|
| 655 |
+
|
| 656 |
+
@app.api_route("/rag/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
|
| 657 |
+
async def rag_proxy(path: str, request):
|
| 658 |
+
"""Proxy requests to RAG service"""
|
| 659 |
+
try:
|
| 660 |
+
async with httpx.AsyncClient(timeout=300.0) as client:
|
| 661 |
+
url = f"{config.RAG_SERVICE_URL}/{path}"
|
| 662 |
+
|
| 663 |
+
# Forward the request
|
| 664 |
+
if request.method == "GET":
|
| 665 |
+
response = await client.get(url, params=request.query_params)
|
| 666 |
+
else:
|
| 667 |
+
# Handle different content types
|
| 668 |
+
content_type = request.headers.get("content-type", "")
|
| 669 |
+
|
| 670 |
+
if "multipart/form-data" in content_type:
|
| 671 |
+
# Handle file uploads
|
| 672 |
+
form = await request.form()
|
| 673 |
+
files = {}
|
| 674 |
+
data = {}
|
| 675 |
+
|
| 676 |
+
for key, value in form.items():
|
| 677 |
+
if hasattr(value, 'read'): # File-like object
|
| 678 |
+
files[key] = (value.filename, await value.read(), value.content_type)
|
| 679 |
+
else:
|
| 680 |
+
data[key] = value
|
| 681 |
+
|
| 682 |
+
response = await client.request(request.method, url, files=files, data=data)
|
| 683 |
+
else:
|
| 684 |
+
# Handle JSON/other content
|
| 685 |
+
body = await request.body()
|
| 686 |
+
response = await client.request(
|
| 687 |
+
request.method,
|
| 688 |
+
url,
|
| 689 |
+
content=body,
|
| 690 |
+
headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
|
| 691 |
+
)
|
| 692 |
+
|
| 693 |
+
# Return response
|
| 694 |
+
return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
|
| 695 |
+
|
| 696 |
+
except httpx.RequestError as e:
|
| 697 |
+
raise HTTPException(status_code=503, detail=f"RAG service unavailable: {e}")
|
| 698 |
+
|
| 699 |
+
# Convenience endpoints (direct service access)
|
| 700 |
+
@app.get("/analyze/text")
|
| 701 |
+
@app.post("/analyze/text")
|
| 702 |
+
async def analyze_text_direct(request=None):
|
| 703 |
+
"""Direct access to NER text analysis"""
|
| 704 |
+
if request:
|
| 705 |
+
return await call_ner_service("/analyze/text", "POST", json=await request.json())
|
| 706 |
+
else:
|
| 707 |
+
return {"message": "Use POST method with text data"}
|
| 708 |
+
|
| 709 |
+
@app.get("/documents")
|
| 710 |
+
async def list_documents():
|
| 711 |
+
"""Direct access to RAG document listing"""
|
| 712 |
+
return await call_rag_service("/documents", "GET")
|
| 713 |
+
|
| 714 |
+
@app.post("/search")
|
| 715 |
+
async def search_direct(request):
|
| 716 |
+
"""Direct access to RAG search"""
|
| 717 |
+
return await call_rag_service("/search", "POST", json=await request.json())
|
| 718 |
+
|
| 719 |
+
# Utility endpoints
|
| 720 |
+
@app.get("/services")
|
| 721 |
+
async def list_services():
|
| 722 |
+
"""List all available services and their endpoints"""
|
| 723 |
+
return {
|
| 724 |
+
"services": {
|
| 725 |
+
"ner": {
|
| 726 |
+
"url": config.NER_SERVICE_URL,
|
| 727 |
+
"description": "Named Entity Recognition with relationship extraction",
|
| 728 |
+
"endpoints": [
|
| 729 |
+
"/analyze/text", "/analyze/file", "/analyze/url", "/analyze/multi",
|
| 730 |
+
"/download/{analysis_id}/{file_type}", "/statistics", "/entity-types", "/relationship-types"
|
| 731 |
+
]
|
| 732 |
+
},
|
| 733 |
+
"ocr": {
|
| 734 |
+
"url": config.OCR_SERVICE_URL,
|
| 735 |
+
"description": "Optical Character Recognition with document processing",
|
| 736 |
+
"endpoints": [
|
| 737 |
+
"/ocr/upload", "/ocr/url", "/ocr/analyze"
|
| 738 |
+
]
|
| 739 |
+
},
|
| 740 |
+
"rag": {
|
| 741 |
+
"url": config.RAG_SERVICE_URL,
|
| 742 |
+
"description": "Retrieval-Augmented Generation with vector search",
|
| 743 |
+
"endpoints": [
|
| 744 |
+
"/documents/upload", "/documents/url", "/search", "/documents", "/documents/{id}"
|
| 745 |
+
]
|
| 746 |
+
}
|
| 747 |
+
},
|
| 748 |
+
"unified": {
|
| 749 |
+
"url": f"http://localhost:{config.MAIN_PORT}",
|
| 750 |
+
"description": "Unified interface for combined workflows",
|
| 751 |
+
"endpoints": [
|
| 752 |
+
"/analyze/unified", "/search/combined", "/ner/*", "/ocr/*", "/rag/*"
|
| 753 |
+
]
|
| 754 |
+
}
|
| 755 |
+
}
|
| 756 |
+
|
| 757 |
+
# Signal handlers for graceful shutdown
|
| 758 |
+
def signal_handler(signum, frame):
|
| 759 |
+
"""Handle shutdown signals"""
|
| 760 |
+
logger.info(f"Received signal {signum}, initiating graceful shutdown...")
|
| 761 |
+
asyncio.create_task(stop_all_services())
|
| 762 |
+
|
| 763 |
+
# Register signal handlers
|
| 764 |
+
signal.signal(signal.SIGINT, signal_handler)
|
| 765 |
+
signal.signal(signal.SIGTERM, signal_handler)
|
| 766 |
+
|
| 767 |
+
# Store start time for uptime calculation
|
| 768 |
+
start_time = time.time()
|
| 769 |
+
|
| 770 |
+
if __name__ == "__main__":
|
| 771 |
+
print("π Starting Unified AI Services Application")
|
| 772 |
+
print("=" * 50)
|
| 773 |
+
|
| 774 |
+
# Validate configuration before starting
|
| 775 |
+
if not validate_environment():
|
| 776 |
+
print("οΏ½οΏ½οΏ½ Configuration validation failed!")
|
| 777 |
+
print("Please check your .env file and ensure all required services are configured.")
|
| 778 |
+
sys.exit(1)
|
| 779 |
+
|
| 780 |
+
print(f"π Main application will run on: http://{config.MAIN_HOST}:{config.MAIN_PORT}")
|
| 781 |
+
print(f"π Services will be started automatically:")
|
| 782 |
+
print(f" β’ NER Service: http://localhost:{config.ner.PORT}")
|
| 783 |
+
print(f" β’ OCR Service: http://localhost:{config.ocr.PORT}")
|
| 784 |
+
print(f" β’ RAG Service: http://localhost:{config.rag.PORT}")
|
| 785 |
+
print("")
|
| 786 |
+
print("π― Available endpoints:")
|
| 787 |
+
print(" β’ Main API: /")
|
| 788 |
+
print(" β’ Health Check: /health")
|
| 789 |
+
print(" β’ Unified Analysis: /analyze/unified")
|
| 790 |
+
print(" β’ Combined Search: /search/combined")
|
| 791 |
+
print(" β’ Service Proxies: /ner/*, /ocr/*, /rag/*")
|
| 792 |
+
print("")
|
| 793 |
+
print("π API Documentation: /docs")
|
| 794 |
+
print("")
|
| 795 |
+
|
| 796 |
+
try:
|
| 797 |
+
uvicorn.run(
|
| 798 |
+
"app:app",
|
| 799 |
+
host=config.MAIN_HOST,
|
| 800 |
+
port=config.MAIN_PORT,
|
| 801 |
+
reload=config.ner.DEBUG,
|
| 802 |
+
log_level="info"
|
| 803 |
+
)
|
| 804 |
+
except KeyboardInterrupt:
|
| 805 |
+
print("\nπ Shutting down gracefully...")
|
| 806 |
+
finally:
|
| 807 |
+
# Cleanup will be handled by the lifespan context manager
|
| 808 |
+
pass
|
configs.py
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Centralized Configuration Management for Unified AI Services
|
| 4 |
+
Manages configuration for NER, OCR, and RAG services
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import logging
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Optional, Dict, Any, List
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
# Load environment variables
|
| 14 |
+
env_path = Path(__file__).parent / '.env'
|
| 15 |
+
if env_path.exists():
|
| 16 |
+
load_dotenv(dotenv_path=env_path)
|
| 17 |
+
else:
|
| 18 |
+
load_dotenv() # Load from default location
|
| 19 |
+
|
| 20 |
+
# Setup logging
|
| 21 |
+
logging.basicConfig(
|
| 22 |
+
level=getattr(logging, os.getenv("LOG_LEVEL", "INFO").upper()),
|
| 23 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 24 |
+
)
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
class BaseConfig:
|
| 28 |
+
"""Base configuration class with common settings"""
|
| 29 |
+
|
| 30 |
+
def __init__(self):
|
| 31 |
+
# Server Configuration
|
| 32 |
+
self.HOST = os.getenv("HOST", "0.0.0.0")
|
| 33 |
+
self.DEBUG = os.getenv("DEBUG", "False").lower() == "true"
|
| 34 |
+
|
| 35 |
+
# Database Configuration (shared by NER and RAG)
|
| 36 |
+
self.POSTGRES_HOST = os.getenv("POSTGRES_HOST", "")
|
| 37 |
+
self.POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", "5432"))
|
| 38 |
+
self.POSTGRES_USER = os.getenv("POSTGRES_USER", "")
|
| 39 |
+
self.POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
|
| 40 |
+
self.POSTGRES_DATABASE = os.getenv("POSTGRES_DATABASE", "postgres")
|
| 41 |
+
|
| 42 |
+
# Azure OpenAI Configuration (shared by NER and RAG)
|
| 43 |
+
self.AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "")
|
| 44 |
+
self.AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "")
|
| 45 |
+
self.EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-large")
|
| 46 |
+
self.AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-3-large")
|
| 47 |
+
|
| 48 |
+
# Azure Storage Configuration (shared by NER and RAG)
|
| 49 |
+
self.AZURE_STORAGE_ACCOUNT_URL = os.getenv("AZURE_STORAGE_ACCOUNT_URL", "")
|
| 50 |
+
self.AZURE_BLOB_SAS_TOKEN = os.getenv("AZURE_BLOB_SAS_TOKEN", "")
|
| 51 |
+
self.BLOB_CONTAINER = os.getenv("BLOB_CONTAINER", "historylog")
|
| 52 |
+
|
| 53 |
+
# Processing Configuration
|
| 54 |
+
self.MAX_FILE_SIZE = int(os.getenv("MAX_FILE_SIZE", "50")) * 1024 * 1024 # Convert MB to bytes
|
| 55 |
+
self.REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "300"))
|
| 56 |
+
|
| 57 |
+
# CORS Configuration
|
| 58 |
+
self.ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*")
|
| 59 |
+
|
| 60 |
+
def validate_azure_openai(self) -> bool:
|
| 61 |
+
"""Validate Azure OpenAI configuration"""
|
| 62 |
+
return bool(
|
| 63 |
+
self.AZURE_OPENAI_ENDPOINT and
|
| 64 |
+
self.AZURE_OPENAI_API_KEY and
|
| 65 |
+
self.AZURE_OPENAI_ENDPOINT != "YOUR_AZURE_OPENAI_ENDPOINT" and
|
| 66 |
+
self.AZURE_OPENAI_API_KEY != "YOUR_AZURE_OPENAI_KEY"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
def validate_postgres(self) -> bool:
|
| 70 |
+
"""Validate PostgreSQL configuration"""
|
| 71 |
+
return bool(
|
| 72 |
+
self.POSTGRES_HOST and
|
| 73 |
+
self.POSTGRES_USER and
|
| 74 |
+
self.POSTGRES_PASSWORD and
|
| 75 |
+
self.POSTGRES_DATABASE
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
def validate_azure_storage(self) -> bool:
|
| 79 |
+
"""Validate Azure Storage configuration"""
|
| 80 |
+
return bool(
|
| 81 |
+
self.AZURE_STORAGE_ACCOUNT_URL and
|
| 82 |
+
self.AZURE_BLOB_SAS_TOKEN
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
class NERConfig(BaseConfig):
|
| 86 |
+
"""Configuration for NER Service"""
|
| 87 |
+
|
| 88 |
+
def __init__(self):
|
| 89 |
+
super().__init__()
|
| 90 |
+
self.PORT = int(os.getenv("NER_PORT", "8500"))
|
| 91 |
+
|
| 92 |
+
# DeepSeek Configuration
|
| 93 |
+
self.DEEPSEEK_ENDPOINT = os.getenv("DEEPSEEK_ENDPOINT", "")
|
| 94 |
+
self.DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
|
| 95 |
+
self.DEEPSEEK_MODEL = os.getenv("DEEPSEEK_MODEL", "DeepSeek-R1-0528")
|
| 96 |
+
|
| 97 |
+
# OCR Service Configuration
|
| 98 |
+
self.OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:8400")
|
| 99 |
+
|
| 100 |
+
# NER Specific Settings
|
| 101 |
+
self.MAX_TEXT_LENGTH = 100000 # 100KB
|
| 102 |
+
self.SUPPORTED_TEXT_FORMATS = {'.txt', '.doc', '.docx', '.rtf'}
|
| 103 |
+
self.SUPPORTED_OCR_FORMATS = {'.pdf', '.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif'}
|
| 104 |
+
|
| 105 |
+
# Entity and Relationship Types
|
| 106 |
+
self.ENTITY_TYPES = [
|
| 107 |
+
"PERSON", "ORGANIZATION", "LOCATION", "DATE", "TIME", "MONEY", "PRODUCT", "EVENT",
|
| 108 |
+
"VEHICLE", "SUSPICIOUS_OBJECT", "ILLEGAL_ACTIVITY", "EVIDENCE", "ILLEGAL_ITEM",
|
| 109 |
+
"WEAPON", "DRUG", "CHEMICAL", "DOCUMENT", "PHONE_NUMBER", "ADDRESS", "EMAIL"
|
| 110 |
+
]
|
| 111 |
+
|
| 112 |
+
self.RELATIONSHIP_TYPES = [
|
| 113 |
+
# Standard relationships
|
| 114 |
+
"works_for", "founded", "located_in", "part_of", "associated_with", "owns", "manages",
|
| 115 |
+
"leads", "reports_to", "collaborates_with", "partners_with", "supplies_to", "acquires",
|
| 116 |
+
"invests_in", "headquartered_in", "operates_in", "born_in", "lives_in", "studied_at",
|
| 117 |
+
"graduated_from", "worked_at", "visited", "attended", "participated_in", "sponsored",
|
| 118 |
+
"developed", "created", "invented", "discovered", "published", "authored", "edited",
|
| 119 |
+
# Thai relationships
|
| 120 |
+
"ΰΈΰΈ³ΰΈΰΈ²ΰΈΰΈΰΈ΅ΰΉ", "ΰΈΰΉΰΈΰΈΰΈ±ΰΉΰΈ", "ΰΈΰΈ±ΰΉΰΈΰΈΰΈ’ΰΈΉΰΉΰΈΰΈ΅ΰΉ", "ΰΉΰΈΰΉΰΈΰΈͺΰΉΰΈ§ΰΈΰΈ«ΰΈΰΈΆΰΉΰΈΰΈΰΈΰΈ", "ΰΉΰΈΰΈ΅ΰΉΰΈ’ΰΈ§ΰΈΰΉΰΈΰΈΰΈΰΈ±ΰΈ", "ΰΉΰΈΰΉΰΈΰΉΰΈΰΉΰΈ²ΰΈΰΈΰΈ", "ΰΈΰΈ±ΰΈΰΈΰΈ²ΰΈ£",
|
| 121 |
+
"ΰΈΰΈ³ΰΉΰΈΰΈ’", "ΰΈ£ΰΈ²ΰΈ’ΰΈΰΈ²ΰΈΰΈΰΉΰΈ", "ΰΈ£ΰΉΰΈ§ΰΈ‘ΰΈΰΈ²ΰΈΰΈΰΈ±ΰΈ", "ΰΉΰΈΰΉΰΈΰΈΰΈ±ΰΈΰΈΰΈ‘ΰΈ΄ΰΈΰΈ£ΰΈΰΈ±ΰΈ", "ΰΈΰΈ±ΰΈΰΈ«ΰΈ²ΰΉΰΈ«ΰΉ", "ΰΈΰΈ·ΰΉΰΈΰΈΰΈ΄ΰΈΰΈΰΈ²ΰΈ£", "ΰΈ₯ΰΈΰΈΰΈΈΰΈΰΉΰΈ",
|
| 122 |
+
"ΰΈͺΰΈ³ΰΈΰΈ±ΰΈΰΈΰΈ²ΰΈΰΉΰΈ«ΰΈΰΉΰΈΰΈ΅ΰΉ", "ΰΈΰΈ³ΰΉΰΈΰΈ΄ΰΈΰΈΰΈ²ΰΈ£ΰΉΰΈ", "ΰΉΰΈΰΈ΄ΰΈΰΈΰΈ΅ΰΉ", "ΰΈΰΈ²ΰΈ¨ΰΈ±ΰΈ’ΰΈΰΈ’ΰΈΉΰΉΰΈΰΈ΅ΰΉ", "ΰΈ¨ΰΈΆΰΈΰΈ©ΰΈ²ΰΈΰΈ΅ΰΉ", "ΰΈΰΈΰΈΰΈ²ΰΈ£ΰΈ¨ΰΈΆΰΈΰΈ©ΰΈ²ΰΈΰΈ²ΰΈ",
|
| 123 |
+
# Law enforcement relationships
|
| 124 |
+
"arrested_by", "investigated_by", "confiscated_from", "used_in", "evidence_of", "witness_of",
|
| 125 |
+
"victim_of", "suspect_in", "charged_with", "convicted_of", "sentenced_by", "defended_by",
|
| 126 |
+
"prosecuted_by", "testified_against", "alibi_for", "found_at", "seized_from", "linked_to",
|
| 127 |
+
"ΰΈΰΈ±ΰΈΰΈΰΈΈΰΈ‘ΰΉΰΈΰΈ’", "ΰΈͺΰΈΰΈΰΈͺΰΈ§ΰΈΰΉΰΈΰΈ’", "ΰΈ’ΰΈΆΰΈΰΈΰΈ²ΰΈ", "ΰΉΰΈΰΉΰΉΰΈΰΈΰΈ²ΰΈ£", "ΰΈ«ΰΈ₯ΰΈ±ΰΈΰΈΰΈ²ΰΈΰΈΰΈΰΈ", "ΰΈΰΈ’ΰΈ²ΰΈΰΉΰΈ", "ΰΉΰΈ«ΰΈ’ΰΈ·ΰΉΰΈΰΈΰΈΰΈ",
|
| 128 |
+
"ΰΈΰΈΉΰΉΰΈΰΉΰΈΰΈΰΈͺΰΈΰΈͺΰΈ±ΰΈ’ΰΉΰΈ", "ΰΈΰΈΉΰΈΰΈΰΈ±ΰΉΰΈΰΈΰΉΰΈΰΈ«ΰΈ²", "ΰΈΰΈΉΰΈΰΈΰΈ±ΰΈΰΈͺΰΈ΄ΰΈ", "ΰΈΰΈΉΰΈΰΈΰΈ΄ΰΈΰΈ²ΰΈΰΈ©ΰΈ²ΰΉΰΈΰΈ’", "ΰΈΰΉΰΈΰΈͺΰΈΉΰΉΰΈΰΈΰΈ΅ΰΉΰΈΰΈ’", "ΰΈΰΉΰΈΰΈΰΈ£ΰΉΰΈΰΈΰΉΰΈΰΈ’",
|
| 129 |
+
"ΰΉΰΈ«ΰΉΰΈΰΈ²ΰΈ£ΰΈΰΉΰΈΰΈΰΉΰΈ²ΰΈ", "ΰΉΰΈΰΉΰΈΰΈΰΉΰΈΰΉΰΈΰΉΰΈΰΈ±ΰΈ§ΰΈͺΰΈ³ΰΈ«ΰΈ£ΰΈ±ΰΈ", "ΰΈΰΈΰΈΰΈ΅ΰΉ", "ΰΈ’ΰΈΆΰΈΰΈΰΈ²ΰΈ", "ΰΉΰΈΰΈ·ΰΉΰΈΰΈ‘ΰΉΰΈ’ΰΈΰΈΰΈ±ΰΈ",
|
| 130 |
+
# Criminal relationships
|
| 131 |
+
"possess_illegal", "transport_illegal", "sell_illegal", "buy_illegal", "hide_evidence",
|
| 132 |
+
"plan_crime", "commit_crime", "flee_from", "escape_from", "hide_at", "meet_with",
|
| 133 |
+
"communicate_with", "threaten", "blackmail", "bribe", "corrupt", "money_launder",
|
| 134 |
+
"ΰΈΰΈ£ΰΈΰΈΰΈΰΈ£ΰΈΰΈΰΈΰΈΰΈΰΈΰΈ΄ΰΈΰΈΰΈΰΈ«ΰΈ‘ΰΈ²ΰΈ’", "ΰΈΰΈΰΈͺΰΉΰΈΰΈΰΈΰΈΰΈΰΈ΄ΰΈΰΈΰΈΰΈ«ΰΈ‘ΰΈ²ΰΈ’", "ΰΈΰΈ²ΰΈ’ΰΈΰΈΰΈΰΈΰΈ΄ΰΈΰΈΰΈΰΈ«ΰΈ‘ΰΈ²ΰΈ’", "ΰΈΰΈ·ΰΉΰΈΰΈΰΈΰΈΰΈΰΈ΄ΰΈΰΈΰΈΰΈ«ΰΈ‘ΰΈ²ΰΈ’",
|
| 135 |
+
"ΰΈΰΉΰΈΰΈΰΈ«ΰΈ₯ΰΈ±ΰΈΰΈΰΈ²ΰΈ", "ΰΈ§ΰΈ²ΰΈΰΉΰΈΰΈΰΈΰΈ²ΰΈΰΈΰΈ²ΰΈΰΈ£ΰΈ£ΰΈ‘", "ΰΈΰΈ£ΰΈ°ΰΈΰΈ³ΰΈΰΈ²ΰΈΰΈΰΈ²ΰΈΰΈ£ΰΈ£ΰΈ‘", "ΰΈ«ΰΈ₯ΰΈΰΈ«ΰΈΰΈ΅ΰΈΰΈ²ΰΈ", "ΰΉΰΈΰΈΰΈΰΉΰΈΰΈΰΈΰΈ΅ΰΉ",
|
| 136 |
+
"ΰΈΰΈΰΈΰΈ°ΰΈΰΈ±ΰΈ", "ΰΈΰΈ΄ΰΈΰΈΰΉΰΈΰΈΰΈ±ΰΈ", "ΰΈΰΉΰΈ‘ΰΈΰΈΉΰΉ", "ΰΉΰΈΰΈ₯ΰΉΰΈΰΉΰΈ‘ΰΈ₯ΰΉ", "ΰΉΰΈ«ΰΉΰΈͺΰΈ΄ΰΈΰΈΰΈ", "ΰΈΰΈΈΰΈΰΈ£ΰΈ΄ΰΈ", "ΰΈΰΈΰΈΰΉΰΈΰΈ΄ΰΈ"
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
def validate_deepseek(self) -> bool:
|
| 140 |
+
"""Validate DeepSeek configuration"""
|
| 141 |
+
return bool(
|
| 142 |
+
self.DEEPSEEK_ENDPOINT and
|
| 143 |
+
self.DEEPSEEK_API_KEY and
|
| 144 |
+
self.DEEPSEEK_ENDPOINT != "YOUR_DEEPSEEK_ENDPOINT" and
|
| 145 |
+
self.DEEPSEEK_API_KEY != "YOUR_DEEPSEEK_API_KEY"
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
class OCRConfig(BaseConfig):
|
| 149 |
+
"""Configuration for OCR Service"""
|
| 150 |
+
|
| 151 |
+
def __init__(self):
|
| 152 |
+
super().__init__()
|
| 153 |
+
self.PORT = int(os.getenv("OCR_PORT", "8400"))
|
| 154 |
+
|
| 155 |
+
# Azure Document Intelligence Configuration
|
| 156 |
+
self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT", "")
|
| 157 |
+
self.AZURE_DOCUMENT_INTELLIGENCE_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_KEY", "")
|
| 158 |
+
|
| 159 |
+
# Web scraping configuration
|
| 160 |
+
self.MAX_IMAGES_PER_PAGE = int(os.getenv("MAX_IMAGES_PER_PAGE", "10"))
|
| 161 |
+
self.USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
| 162 |
+
|
| 163 |
+
def validate_azure_document_intelligence(self) -> bool:
|
| 164 |
+
"""Validate Azure Document Intelligence configuration"""
|
| 165 |
+
return bool(
|
| 166 |
+
self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT and
|
| 167 |
+
self.AZURE_DOCUMENT_INTELLIGENCE_KEY and
|
| 168 |
+
self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT != "YOUR_FORM_RECOGNIZER_ENDPOINT" and
|
| 169 |
+
self.AZURE_DOCUMENT_INTELLIGENCE_KEY != "YOUR_FORM_RECOGNIZER_KEY"
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
class RAGConfig(BaseConfig):
|
| 173 |
+
"""Configuration for RAG Service"""
|
| 174 |
+
|
| 175 |
+
def __init__(self):
|
| 176 |
+
super().__init__()
|
| 177 |
+
self.PORT = int(os.getenv("RAG_PORT", "8401"))
|
| 178 |
+
|
| 179 |
+
# OCR Service Configuration
|
| 180 |
+
self.OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:8400")
|
| 181 |
+
|
| 182 |
+
# PostgreSQL Configuration (specific to RAG)
|
| 183 |
+
self.PG_HOST = self.POSTGRES_HOST
|
| 184 |
+
self.PG_PORT = self.POSTGRES_PORT
|
| 185 |
+
self.PG_DATABASE = os.getenv("PG_DATABASE", "vectorsearch") # RAG uses different default DB
|
| 186 |
+
self.PG_USER = self.POSTGRES_USER
|
| 187 |
+
self.PG_PASSWORD = self.POSTGRES_PASSWORD
|
| 188 |
+
self.PG_SSL_MODE = os.getenv("PG_SSL_MODE", "require")
|
| 189 |
+
|
| 190 |
+
# Chunking Configuration
|
| 191 |
+
self.CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "1000"))
|
| 192 |
+
self.CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "200"))
|
| 193 |
+
self.MIN_CHUNK_SIZE = int(os.getenv("MIN_CHUNK_SIZE", "50"))
|
| 194 |
+
|
| 195 |
+
# Azure OpenAI Configuration (RAG specific)
|
| 196 |
+
self.AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT", "text-embedding-3-small")
|
| 197 |
+
self.AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
|
| 198 |
+
|
| 199 |
+
class UnifiedConfig:
|
| 200 |
+
"""Unified configuration for all services"""
|
| 201 |
+
|
| 202 |
+
def __init__(self):
|
| 203 |
+
self.ner = NERConfig()
|
| 204 |
+
self.ocr = OCRConfig()
|
| 205 |
+
self.rag = RAGConfig()
|
| 206 |
+
|
| 207 |
+
# Main app configuration
|
| 208 |
+
self.MAIN_PORT = int(os.getenv("MAIN_PORT", "8000"))
|
| 209 |
+
self.MAIN_HOST = os.getenv("MAIN_HOST", "0.0.0.0")
|
| 210 |
+
|
| 211 |
+
# Service URLs (for inter-service communication)
|
| 212 |
+
self.NER_SERVICE_URL = f"http://localhost:{self.ner.PORT}"
|
| 213 |
+
self.OCR_SERVICE_URL = f"http://localhost:{self.ocr.PORT}"
|
| 214 |
+
self.RAG_SERVICE_URL = f"http://localhost:{self.rag.PORT}"
|
| 215 |
+
|
| 216 |
+
# Service Health Check Configuration
|
| 217 |
+
self.HEALTH_CHECK_TIMEOUT = 30
|
| 218 |
+
self.HEALTH_CHECK_RETRIES = 3
|
| 219 |
+
self.HEALTH_CHECK_INTERVAL = 5
|
| 220 |
+
|
| 221 |
+
# Load balancing and routing
|
| 222 |
+
self.SERVICE_WEIGHTS = {
|
| 223 |
+
"ner": 1.0,
|
| 224 |
+
"ocr": 1.0,
|
| 225 |
+
"rag": 1.0
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
def validate_all(self) -> Dict[str, Dict[str, bool]]:
|
| 229 |
+
"""Validate all service configurations"""
|
| 230 |
+
validation_results = {
|
| 231 |
+
"ner": {
|
| 232 |
+
"deepseek": self.ner.validate_deepseek(),
|
| 233 |
+
"azure_openai": self.ner.validate_azure_openai(),
|
| 234 |
+
"postgres": self.ner.validate_postgres(),
|
| 235 |
+
"azure_storage": self.ner.validate_azure_storage()
|
| 236 |
+
},
|
| 237 |
+
"ocr": {
|
| 238 |
+
"azure_document_intelligence": self.ocr.validate_azure_document_intelligence()
|
| 239 |
+
},
|
| 240 |
+
"rag": {
|
| 241 |
+
"azure_openai": self.rag.validate_azure_openai(),
|
| 242 |
+
"postgres": self.rag.validate_postgres()
|
| 243 |
+
}
|
| 244 |
+
}
|
| 245 |
+
return validation_results
|
| 246 |
+
|
| 247 |
+
def get_service_config(self, service_name: str) -> BaseConfig:
|
| 248 |
+
"""Get configuration for a specific service"""
|
| 249 |
+
service_configs = {
|
| 250 |
+
"ner": self.ner,
|
| 251 |
+
"ocr": self.ocr,
|
| 252 |
+
"rag": self.rag
|
| 253 |
+
}
|
| 254 |
+
return service_configs.get(service_name.lower())
|
| 255 |
+
|
| 256 |
+
def get_database_config(self) -> Dict[str, str]:
|
| 257 |
+
"""Get database configuration for services that need it"""
|
| 258 |
+
return {
|
| 259 |
+
"host": self.ner.POSTGRES_HOST,
|
| 260 |
+
"port": str(self.ner.POSTGRES_PORT),
|
| 261 |
+
"user": self.ner.POSTGRES_USER,
|
| 262 |
+
"password": self.ner.POSTGRES_PASSWORD,
|
| 263 |
+
"database": self.ner.POSTGRES_DATABASE,
|
| 264 |
+
"ssl_mode": getattr(self.rag, 'PG_SSL_MODE', 'require')
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
def get_azure_openai_config(self) -> Dict[str, str]:
|
| 268 |
+
"""Get Azure OpenAI configuration for services that need it"""
|
| 269 |
+
return {
|
| 270 |
+
"endpoint": self.ner.AZURE_OPENAI_ENDPOINT,
|
| 271 |
+
"api_key": self.ner.AZURE_OPENAI_API_KEY,
|
| 272 |
+
"embedding_model": self.ner.EMBEDDING_MODEL,
|
| 273 |
+
"deployment_name": self.ner.AZURE_OPENAI_DEPLOYMENT_NAME
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
def print_configuration_summary(self):
|
| 277 |
+
"""Print a summary of all configurations"""
|
| 278 |
+
print("π§ Configuration Summary")
|
| 279 |
+
print("=" * 50)
|
| 280 |
+
|
| 281 |
+
# Validate all configurations
|
| 282 |
+
validation_results = self.validate_all()
|
| 283 |
+
|
| 284 |
+
# NER Service
|
| 285 |
+
print(f"π NER Service (Port {self.ner.PORT}):")
|
| 286 |
+
print(f" DeepSeek: {'β
' if validation_results['ner']['deepseek'] else 'β'}")
|
| 287 |
+
print(f" Azure OpenAI: {'β
' if validation_results['ner']['azure_openai'] else 'β'}")
|
| 288 |
+
print(f" PostgreSQL: {'β
' if validation_results['ner']['postgres'] else 'β'}")
|
| 289 |
+
print(f" Azure Storage: {'β
' if validation_results['ner']['azure_storage'] else 'β'}")
|
| 290 |
+
print(f" OCR Service URL: {self.ner.OCR_SERVICE_URL}")
|
| 291 |
+
|
| 292 |
+
# OCR Service
|
| 293 |
+
print(f"\nπ OCR Service (Port {self.ocr.PORT}):")
|
| 294 |
+
print(f" Azure Document Intelligence: {'β
' if validation_results['ocr']['azure_document_intelligence'] else 'β'}")
|
| 295 |
+
print(f" Max File Size: {self.ocr.MAX_FILE_SIZE / (1024*1024):.0f} MB")
|
| 296 |
+
|
| 297 |
+
# RAG Service
|
| 298 |
+
print(f"\nπ§ RAG Service (Port {self.rag.PORT}):")
|
| 299 |
+
print(f" Azure OpenAI: {'β
' if validation_results['rag']['azure_openai'] else 'β'}")
|
| 300 |
+
print(f" PostgreSQL: {'β
' if validation_results['rag']['postgres'] else 'β'}")
|
| 301 |
+
print(f" OCR Service URL: {self.rag.OCR_SERVICE_URL}")
|
| 302 |
+
print(f" Chunk Size: {self.rag.CHUNK_SIZE}")
|
| 303 |
+
|
| 304 |
+
# Main App
|
| 305 |
+
print(f"\nπ Main App (Port {self.MAIN_PORT}):")
|
| 306 |
+
print(f" NER Service: {self.NER_SERVICE_URL}")
|
| 307 |
+
print(f" OCR Service: {self.OCR_SERVICE_URL}")
|
| 308 |
+
print(f" RAG Service: {self.RAG_SERVICE_URL}")
|
| 309 |
+
|
| 310 |
+
# Database Configuration
|
| 311 |
+
print(f"\nποΈ Database Configuration:")
|
| 312 |
+
print(f" Host: {self.ner.POSTGRES_HOST}")
|
| 313 |
+
print(f" Port: {self.ner.POSTGRES_PORT}")
|
| 314 |
+
print(f" User: {self.ner.POSTGRES_USER}")
|
| 315 |
+
print(f" NER Database: {self.ner.POSTGRES_DATABASE}")
|
| 316 |
+
print(f" RAG Database: {self.rag.PG_DATABASE}")
|
| 317 |
+
|
| 318 |
+
# Critical Issues
|
| 319 |
+
all_validations = []
|
| 320 |
+
for service, validations in validation_results.items():
|
| 321 |
+
all_validations.extend(validations.values())
|
| 322 |
+
|
| 323 |
+
if not all(all_validations):
|
| 324 |
+
print(f"\nβ οΈ CONFIGURATION ISSUES DETECTED:")
|
| 325 |
+
for service, validations in validation_results.items():
|
| 326 |
+
for component, is_valid in validations.items():
|
| 327 |
+
if not is_valid:
|
| 328 |
+
print(f" β {service.upper()}: {component} not configured")
|
| 329 |
+
else:
|
| 330 |
+
print(f"\nβ
All configurations are valid!")
|
| 331 |
+
|
| 332 |
+
# Global configuration instance
|
| 333 |
+
config = UnifiedConfig()
|
| 334 |
+
|
| 335 |
+
def get_config() -> UnifiedConfig:
|
| 336 |
+
"""Get the global configuration instance"""
|
| 337 |
+
return config
|
| 338 |
+
|
| 339 |
+
def validate_environment() -> bool:
|
| 340 |
+
"""Validate the entire environment configuration"""
|
| 341 |
+
validation_results = config.validate_all()
|
| 342 |
+
|
| 343 |
+
# Check critical components
|
| 344 |
+
critical_components = [
|
| 345 |
+
validation_results['ner']['azure_openai'],
|
| 346 |
+
validation_results['ner']['postgres'],
|
| 347 |
+
validation_results['ocr']['azure_document_intelligence'],
|
| 348 |
+
validation_results['rag']['azure_openai'],
|
| 349 |
+
validation_results['rag']['postgres']
|
| 350 |
+
]
|
| 351 |
+
|
| 352 |
+
return all(critical_components)
|
| 353 |
+
|
| 354 |
+
if __name__ == "__main__":
|
| 355 |
+
"""Test configuration loading and validation"""
|
| 356 |
+
print("π§ͺ Testing Configuration Loading")
|
| 357 |
+
print("=" * 40)
|
| 358 |
+
|
| 359 |
+
try:
|
| 360 |
+
config.print_configuration_summary()
|
| 361 |
+
|
| 362 |
+
if validate_environment():
|
| 363 |
+
print("\nπ Environment validation passed!")
|
| 364 |
+
print("All critical services are properly configured.")
|
| 365 |
+
else:
|
| 366 |
+
print("\nβ Environment validation failed!")
|
| 367 |
+
print("Some critical services are not properly configured.")
|
| 368 |
+
print("Please check your .env file and update missing values.")
|
| 369 |
+
|
| 370 |
+
except Exception as e:
|
| 371 |
+
print(f"\nβ Configuration loading failed: {e}")
|
| 372 |
+
logger.error(f"Configuration error: {e}")
|
demo.py
ADDED
|
@@ -0,0 +1,527 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Unified AI Services - Interactive Demo
|
| 4 |
+
Demonstrates the capabilities of the unified system with real examples
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import httpx
|
| 9 |
+
import json
|
| 10 |
+
import time
|
| 11 |
+
import sys
|
| 12 |
+
from typing import Dict, Any, Optional
|
| 13 |
+
|
| 14 |
+
# Demo configuration
|
| 15 |
+
UNIFIED_URL = "http://localhost:8000"
|
| 16 |
+
TIMEOUT = 60
|
| 17 |
+
|
| 18 |
+
# Demo data
|
| 19 |
+
DEMO_TEXTS = {
|
| 20 |
+
"thai_crime": """
|
| 21 |
+
ΰΈΰΈΰΈ΅ΰΈΰΈ²ΰΈΰΈ²ΰΈΰΈ΅ΰΉΰΈͺΰΈ³ΰΈΰΈ±ΰΈ: ΰΈΰΈ²ΰΈ£ΰΈΰΈ²ΰΈΰΈΰΈ£ΰΈ£ΰΈ‘ΰΈΰΈ΅ΰΉΰΈΰΈ£ΰΈΈΰΈΰΉΰΈΰΈΰΈ‘ΰΈ«ΰΈ²ΰΈΰΈΰΈ£
|
| 22 |
+
|
| 23 |
+
ΰΉΰΈ‘ΰΈ·ΰΉΰΈΰΈ§ΰΈ±ΰΈΰΈΰΈ΅ΰΉ 15 ΰΈΰΈΈΰΈ₯ΰΈ²ΰΈΰΈ‘ 2567 ΰΉΰΈ§ΰΈ₯ΰΈ² 14:30 ΰΈ.
|
| 24 |
+
ΰΈΰΈ²ΰΈ’ΰΈͺΰΈ‘ΰΈΰΈ²ΰΈ’ ΰΉΰΈΰΈΰΈ΅ ΰΈΰΈ²ΰΈ’ΰΈΈ 45 ΰΈΰΈ΅ ΰΈΰΈ²ΰΈΰΈ΅ΰΈΰΈΰΈ±ΰΈΰΈΰΈΈΰΈ£ΰΈΰΈ΄ΰΈ
|
| 25 |
+
ΰΈΰΈΉΰΈΰΈΰΈΰΉΰΈͺΰΈ΅ΰΈ’ΰΈΰΈ΅ΰΈ§ΰΈ΄ΰΈΰΈΰΈ΅ΰΉΰΈΰΈΰΈΰΉΰΈΰΈ‘ΰΈ΄ΰΉΰΈΰΈ΅ΰΈ’ΰΈ‘ ΰΉΰΈΰΈΰΈ° ΰΈ£ΰΈ΄ΰΉΰΈ§ΰΈΰΈ£ΰΉ ΰΈΰΈ΄ΰΈΰΈ΅ΰΉ ΰΈΰΈ±ΰΉΰΈ 25
|
| 26 |
+
|
| 27 |
+
ΰΈΰΈΉΰΉΰΈΰΉΰΈΰΈΰΈͺΰΈΰΈͺΰΈ±ΰΈ’: ΰΈΰΈ²ΰΈΰΈͺΰΈ²ΰΈ§ΰΈ‘ΰΈΰΈ΅ ΰΈ£ΰΈ±ΰΈΰΉΰΈΰΈ΄ΰΈ ΰΈΰΈ²ΰΈ’ΰΈΈ 32 ΰΈΰΈ΅
|
| 28 |
+
ΰΉΰΈΰΉΰΈΰΉΰΈ₯ΰΈΰΈ²ΰΈΰΈΈΰΈΰΈ²ΰΈ£ΰΈΰΈΰΈΰΈΰΈΉΰΉΰΉΰΈͺΰΈ΅ΰΈ’ΰΈΰΈ΅ΰΈ§ΰΈ΄ΰΈ
|
| 29 |
+
|
| 30 |
+
ΰΈ«ΰΈ₯ΰΈ±ΰΈΰΈΰΈ²ΰΈ: ΰΈΰΈΰΈͺΰΈ²ΰΈ£ΰΈΰΈ΄ΰΈ©ΰΉΰΈΰΉΰΈΰΉΰΈ§ΰΈΰΉΰΈ³
|
| 31 |
+
ΰΉΰΈΰΈ΄ΰΈΰΈΰΈ³ΰΈΰΈ§ΰΈ 500,000 ΰΈΰΈ²ΰΈ ΰΈ«ΰΈ²ΰΈ’ΰΉΰΈΰΈΰΈ²ΰΈΰΈΰΈΉΰΉΰΉΰΈΰΈ
|
| 32 |
+
ΰΈΰΈ₯ΰΉΰΈΰΈΰΈ§ΰΈΰΈΰΈ£ΰΈΰΈ΄ΰΈΰΈΰΈ±ΰΈΰΈΰΈΆΰΈΰΉΰΈ«ΰΈΰΈΈΰΈΰΈ²ΰΈ£ΰΈΰΉΰΉΰΈΰΉ
|
| 33 |
+
|
| 34 |
+
ΰΈΰΈ³ΰΈ£ΰΈ§ΰΈΰΈͺΰΈΰΈ²ΰΈΰΈ΅ΰΈΰΈΰΈΰΈ«ΰΈ₯ΰΉΰΈΰΈΰΈ³ΰΈΰΈ²ΰΈ£ΰΈͺΰΈ·ΰΈΰΈͺΰΈ§ΰΈ
|
| 35 |
+
ΰΈΰΈΰΈ§ΰΉΰΈ²ΰΈΰΈΉΰΉΰΈΰΉΰΈΰΈΰΈͺΰΈΰΈͺΰΈ±ΰΈ’ΰΈ‘ΰΈ΅ΰΈ«ΰΈΰΈ΅ΰΉΰΈͺΰΈ΄ΰΈΰΈΰΈ³ΰΈΰΈ§ΰΈΰΈ‘ΰΈ²ΰΈ
|
| 36 |
+
""",
|
| 37 |
+
|
| 38 |
+
"english_business": """
|
| 39 |
+
Corporate Investigation Report - Tech Acquisition
|
| 40 |
+
|
| 41 |
+
On October 20, 2024, Microsoft Corporation announced the acquisition
|
| 42 |
+
of AI startup InnovateTech for $2.5 billion USD.
|
| 43 |
+
|
| 44 |
+
Key Personnel:
|
| 45 |
+
- CEO Sarah Johnson of InnovateTech
|
| 46 |
+
- VP Acquisitions David Chen at Microsoft
|
| 47 |
+
- Investment banker Lisa Rodriguez from Goldman Sachs
|
| 48 |
+
|
| 49 |
+
The deal includes:
|
| 50 |
+
- 150 AI researchers and engineers
|
| 51 |
+
- Proprietary machine learning algorithms
|
| 52 |
+
- Patents portfolio worth $800 million
|
| 53 |
+
- Office locations in San Francisco and Seattle
|
| 54 |
+
|
| 55 |
+
The acquisition strengthens Microsoft's position in the AI market
|
| 56 |
+
and provides access to advanced natural language processing technology.
|
| 57 |
+
""",
|
| 58 |
+
|
| 59 |
+
"mixed_content": """
|
| 60 |
+
International Business Partnership
|
| 61 |
+
ΰΈΰΈ£ΰΈ΄ΰΈ©ΰΈ±ΰΈ ΰΉΰΈΰΈ’ΰΉΰΈΰΈ ΰΈΰΈ³ΰΈΰΈ±ΰΈ (ThaiTech Ltd.)
|
| 62 |
+
|
| 63 |
+
Partnership Agreement between:
|
| 64 |
+
- ThaiTech Limited (Thailand)
|
| 65 |
+
- Singapore AI Solutions Pte Ltd (Singapore)
|
| 66 |
+
- Tokyo Innovation Corp (Japan)
|
| 67 |
+
|
| 68 |
+
ΰΈΰΉΰΈΰΈΰΈΰΈ₯ΰΈΰΈΰΈ§ΰΈ²ΰΈ‘ΰΈ£ΰΉΰΈ§ΰΈ‘ΰΈ‘ΰΈ·ΰΈ:
|
| 69 |
+
Investment: $10 million USD (approximately 350 million Thai Baht)
|
| 70 |
+
Duration: 5 years (2024-2029)
|
| 71 |
+
Focus: Artificial Intelligence and Machine Learning
|
| 72 |
+
|
| 73 |
+
Key Locations:
|
| 74 |
+
- Bangkok, Thailand (Head Office)
|
| 75 |
+
- ΰΈͺΰΈ΄ΰΈΰΈΰΉΰΈΰΈ£ΰΉ (Singapore Regional Office)
|
| 76 |
+
- Tokyo, Japan (R&D Center)
|
| 77 |
+
|
| 78 |
+
Expected Revenue: $50 million USD by 2027
|
| 79 |
+
"""
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
class UnifiedDemo:
|
| 83 |
+
"""Interactive demo for the unified AI services"""
|
| 84 |
+
|
| 85 |
+
def __init__(self):
|
| 86 |
+
self.session = None
|
| 87 |
+
self.demo_results = {}
|
| 88 |
+
|
| 89 |
+
async def __aenter__(self):
|
| 90 |
+
self.session = httpx.AsyncClient(timeout=TIMEOUT)
|
| 91 |
+
return self
|
| 92 |
+
|
| 93 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 94 |
+
if self.session:
|
| 95 |
+
await self.session.aclose()
|
| 96 |
+
|
| 97 |
+
def print_header(self, title: str):
|
| 98 |
+
"""Print formatted header"""
|
| 99 |
+
print("\n" + "=" * 70)
|
| 100 |
+
print(f" {title}")
|
| 101 |
+
print("=" * 70)
|
| 102 |
+
|
| 103 |
+
def print_section(self, title: str):
|
| 104 |
+
"""Print section header"""
|
| 105 |
+
print(f"\nπ {title}")
|
| 106 |
+
print("-" * 50)
|
| 107 |
+
|
| 108 |
+
async def check_system_health(self) -> bool:
|
| 109 |
+
"""Check if the unified system is healthy"""
|
| 110 |
+
try:
|
| 111 |
+
response = await self.session.get(f"{UNIFIED_URL}/health")
|
| 112 |
+
|
| 113 |
+
if response.status_code == 200:
|
| 114 |
+
data = response.json()
|
| 115 |
+
status = data.get("status")
|
| 116 |
+
services = data.get("services", [])
|
| 117 |
+
|
| 118 |
+
print(f"π₯ System Health: {status}")
|
| 119 |
+
|
| 120 |
+
for service in services:
|
| 121 |
+
health_icon = "β
" if service.get("health") else "β"
|
| 122 |
+
print(f" {health_icon} {service.get('name', 'unknown')}: {service.get('status', 'unknown')}")
|
| 123 |
+
|
| 124 |
+
healthy_services = [s for s in services if s.get("health")]
|
| 125 |
+
|
| 126 |
+
if len(healthy_services) >= 3: # At least 3 services should be healthy
|
| 127 |
+
print("β
System is ready for demo!")
|
| 128 |
+
return True
|
| 129 |
+
else:
|
| 130 |
+
print("β System is not ready. Please ensure all services are running.")
|
| 131 |
+
return False
|
| 132 |
+
else:
|
| 133 |
+
print(f"β Health check failed: HTTP {response.status_code}")
|
| 134 |
+
return False
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"β Cannot connect to unified system: {e}")
|
| 138 |
+
print("\nπ‘ Make sure the unified application is running:")
|
| 139 |
+
print(" python app.py")
|
| 140 |
+
return False
|
| 141 |
+
|
| 142 |
+
async def demo_unified_analysis(self, text: str, title: str) -> Optional[Dict[str, Any]]:
|
| 143 |
+
"""Demonstrate unified analysis capabilities"""
|
| 144 |
+
self.print_section(f"Unified Analysis: {title}")
|
| 145 |
+
|
| 146 |
+
try:
|
| 147 |
+
print(f"π Analyzing text ({len(text)} characters)...")
|
| 148 |
+
print(f" Text preview: {text[:100]}...")
|
| 149 |
+
|
| 150 |
+
request_data = {
|
| 151 |
+
"text": text,
|
| 152 |
+
"extract_relationships": True,
|
| 153 |
+
"include_embeddings": False,
|
| 154 |
+
"include_summary": True,
|
| 155 |
+
"generate_graph_files": True,
|
| 156 |
+
"export_formats": ["neo4j", "json"],
|
| 157 |
+
"enable_rag_indexing": True,
|
| 158 |
+
"rag_title": f"Demo: {title}",
|
| 159 |
+
"rag_keywords": ["demo", "analysis", "test"],
|
| 160 |
+
"rag_metadata": {"demo": True, "category": title.lower()}
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
start_time = time.time()
|
| 164 |
+
response = await self.session.post(f"{UNIFIED_URL}/analyze/unified", json=request_data)
|
| 165 |
+
processing_time = time.time() - start_time
|
| 166 |
+
|
| 167 |
+
if response.status_code == 200:
|
| 168 |
+
data = response.json()
|
| 169 |
+
|
| 170 |
+
if data.get("success"):
|
| 171 |
+
service_calls = data.get("service_calls", [])
|
| 172 |
+
ner_analysis = data.get("ner_analysis", {})
|
| 173 |
+
rag_document = data.get("rag_document", {})
|
| 174 |
+
|
| 175 |
+
print(f"β
Analysis completed in {processing_time:.2f} seconds")
|
| 176 |
+
print(f"π Service calls: {', '.join(service_calls)}")
|
| 177 |
+
|
| 178 |
+
# NER Results
|
| 179 |
+
if ner_analysis:
|
| 180 |
+
entities = ner_analysis.get("entities", [])
|
| 181 |
+
relationships = ner_analysis.get("relationships", [])
|
| 182 |
+
language = ner_analysis.get("language", "unknown")
|
| 183 |
+
|
| 184 |
+
print(f"\nπ·οΈ NER Analysis Results:")
|
| 185 |
+
print(f" Language detected: {language}")
|
| 186 |
+
print(f" Entities found: {len(entities)}")
|
| 187 |
+
print(f" Relationships found: {len(relationships)}")
|
| 188 |
+
|
| 189 |
+
# Show top entities by type
|
| 190 |
+
entity_types = {}
|
| 191 |
+
for entity in entities:
|
| 192 |
+
entity_type = entity.get("label", "UNKNOWN")
|
| 193 |
+
if entity_type not in entity_types:
|
| 194 |
+
entity_types[entity_type] = []
|
| 195 |
+
entity_types[entity_type].append(entity.get("text", ""))
|
| 196 |
+
|
| 197 |
+
print(f"\n π Entity breakdown:")
|
| 198 |
+
for entity_type, entity_list in sorted(entity_types.items()):
|
| 199 |
+
print(f" {entity_type}: {len(entity_list)} entities")
|
| 200 |
+
# Show a few examples
|
| 201 |
+
examples = entity_list[:3]
|
| 202 |
+
if examples:
|
| 203 |
+
print(f" Examples: {', '.join(examples)}")
|
| 204 |
+
|
| 205 |
+
# Show relationships
|
| 206 |
+
if relationships:
|
| 207 |
+
print(f"\n π Relationship examples:")
|
| 208 |
+
for rel in relationships[:3]:
|
| 209 |
+
source = rel.get("source_entity", "Unknown")
|
| 210 |
+
target = rel.get("target_entity", "Unknown")
|
| 211 |
+
rel_type = rel.get("relationship_type", "unknown")
|
| 212 |
+
confidence = rel.get("confidence", 0)
|
| 213 |
+
print(f" {source} β {target} ({rel_type}, {confidence:.2f})")
|
| 214 |
+
else:
|
| 215 |
+
print(f" β οΈ No relationships found")
|
| 216 |
+
|
| 217 |
+
# RAG Results
|
| 218 |
+
if rag_document:
|
| 219 |
+
print(f"\nπΎ RAG Indexing Results:")
|
| 220 |
+
print(f" Document ID: {rag_document.get('document_id', 'N/A')}")
|
| 221 |
+
print(f" Total chunks: {rag_document.get('total_chunks', 0)}")
|
| 222 |
+
print(f" Status: Document indexed for search")
|
| 223 |
+
else:
|
| 224 |
+
print(f"\nβ οΈ RAG indexing was not performed")
|
| 225 |
+
|
| 226 |
+
# Store results for later use
|
| 227 |
+
self.demo_results[title] = data
|
| 228 |
+
return data
|
| 229 |
+
else:
|
| 230 |
+
print(f"β Analysis failed: {data.get('error', 'Unknown error')}")
|
| 231 |
+
return None
|
| 232 |
+
else:
|
| 233 |
+
print(f"β Request failed: HTTP {response.status_code}")
|
| 234 |
+
print(f" Response: {response.text[:200]}")
|
| 235 |
+
return None
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
print(f"β Analysis error: {e}")
|
| 239 |
+
return None
|
| 240 |
+
|
| 241 |
+
async def demo_combined_search(self):
|
| 242 |
+
"""Demonstrate combined search capabilities"""
|
| 243 |
+
self.print_section("Combined Search with NER Enhancement")
|
| 244 |
+
|
| 245 |
+
search_queries = [
|
| 246 |
+
"murder investigation Thailand",
|
| 247 |
+
"Microsoft acquisition business",
|
| 248 |
+
"artificial intelligence partnership"
|
| 249 |
+
]
|
| 250 |
+
|
| 251 |
+
for query in search_queries:
|
| 252 |
+
try:
|
| 253 |
+
print(f"\nπ Searching for: '{query}'")
|
| 254 |
+
|
| 255 |
+
request_data = {
|
| 256 |
+
"query": query,
|
| 257 |
+
"limit": 3,
|
| 258 |
+
"similarity_threshold": 0.1,
|
| 259 |
+
"include_ner_analysis": True,
|
| 260 |
+
"ner_export_formats": ["json"]
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
start_time = time.time()
|
| 264 |
+
response = await self.session.post(f"{UNIFIED_URL}/search/combined", json=request_data)
|
| 265 |
+
search_time = time.time() - start_time
|
| 266 |
+
|
| 267 |
+
if response.status_code == 200:
|
| 268 |
+
data = response.json()
|
| 269 |
+
|
| 270 |
+
if data.get("success"):
|
| 271 |
+
search_results = data.get("search_results", {})
|
| 272 |
+
results = search_results.get("results", [])
|
| 273 |
+
ner_analyses = search_results.get("ner_analyses", [])
|
| 274 |
+
|
| 275 |
+
print(f" β
Search completed in {search_time:.2f} seconds")
|
| 276 |
+
print(f" π Found {len(results)} results")
|
| 277 |
+
|
| 278 |
+
for i, result in enumerate(results):
|
| 279 |
+
chunk = result.get("chunk", {})
|
| 280 |
+
similarity = result.get("similarity_score", 0)
|
| 281 |
+
doc_info = result.get("document_info", {})
|
| 282 |
+
|
| 283 |
+
print(f"\n π Result {i+1} (similarity: {similarity:.3f}):")
|
| 284 |
+
print(f" Title: {doc_info.get('title', 'Untitled')}")
|
| 285 |
+
print(f" Content: {chunk.get('content', '')[:100]}...")
|
| 286 |
+
|
| 287 |
+
if ner_analyses:
|
| 288 |
+
print(f"\n π·οΈ NER analysis performed on top {len(ner_analyses)} results")
|
| 289 |
+
for ner_data in ner_analyses:
|
| 290 |
+
ner_result = ner_data.get("ner_analysis", {})
|
| 291 |
+
if ner_result.get("success"):
|
| 292 |
+
entities = ner_result.get("entities", [])
|
| 293 |
+
relationships = ner_result.get("relationships", [])
|
| 294 |
+
print(f" Result {ner_data.get('result_index', 0)}: {len(entities)} entities, {len(relationships)} relationships")
|
| 295 |
+
|
| 296 |
+
else:
|
| 297 |
+
print(f" β Search failed: {data.get('error', 'Unknown error')}")
|
| 298 |
+
else:
|
| 299 |
+
print(f" β Search failed: HTTP {response.status_code}")
|
| 300 |
+
|
| 301 |
+
except Exception as e:
|
| 302 |
+
print(f" β Search error: {e}")
|
| 303 |
+
|
| 304 |
+
async def demo_service_proxies(self):
|
| 305 |
+
"""Demonstrate service proxy functionality"""
|
| 306 |
+
self.print_section("Service Proxy Demonstration")
|
| 307 |
+
|
| 308 |
+
# Test NER proxy
|
| 309 |
+
try:
|
| 310 |
+
print("π§ͺ Testing NER service proxy...")
|
| 311 |
+
|
| 312 |
+
test_data = {
|
| 313 |
+
"text": "Quick test: Apple Inc. CEO Tim Cook visited Tokyo, Japan.",
|
| 314 |
+
"extract_relationships": True,
|
| 315 |
+
"include_embeddings": False,
|
| 316 |
+
"generate_graph_files": False
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
response = await self.session.post(f"{UNIFIED_URL}/ner/analyze/text", json=test_data)
|
| 320 |
+
|
| 321 |
+
if response.status_code == 200:
|
| 322 |
+
result = response.json()
|
| 323 |
+
if result.get("success"):
|
| 324 |
+
entities = result.get("entities", [])
|
| 325 |
+
print(f" β
NER proxy working: found {len(entities)} entities")
|
| 326 |
+
else:
|
| 327 |
+
print(f" β NER proxy failed: {result.get('error', 'Unknown error')}")
|
| 328 |
+
else:
|
| 329 |
+
print(f" β NER proxy failed: HTTP {response.status_code}")
|
| 330 |
+
|
| 331 |
+
except Exception as e:
|
| 332 |
+
print(f" β NER proxy error: {e}")
|
| 333 |
+
|
| 334 |
+
# Test RAG proxy
|
| 335 |
+
try:
|
| 336 |
+
print("π§ͺ Testing RAG service proxy...")
|
| 337 |
+
|
| 338 |
+
response = await self.session.get(f"{UNIFIED_URL}/rag/documents?limit=3")
|
| 339 |
+
|
| 340 |
+
if response.status_code == 200:
|
| 341 |
+
result = response.json()
|
| 342 |
+
documents = result.get("documents", [])
|
| 343 |
+
print(f" β
RAG proxy working: found {len(documents)} documents")
|
| 344 |
+
else:
|
| 345 |
+
print(f" β RAG proxy failed: HTTP {response.status_code}")
|
| 346 |
+
|
| 347 |
+
except Exception as e:
|
| 348 |
+
print(f" β RAG proxy error: {e}")
|
| 349 |
+
|
| 350 |
+
# Test OCR proxy
|
| 351 |
+
try:
|
| 352 |
+
print("π§ͺ Testing OCR service proxy...")
|
| 353 |
+
|
| 354 |
+
response = await self.session.get(f"{UNIFIED_URL}/ocr/health")
|
| 355 |
+
|
| 356 |
+
if response.status_code == 200:
|
| 357 |
+
print(f" β
OCR proxy working: health check passed")
|
| 358 |
+
else:
|
| 359 |
+
print(f" β OCR proxy failed: HTTP {response.status_code}")
|
| 360 |
+
|
| 361 |
+
except Exception as e:
|
| 362 |
+
print(f" β OCR proxy error: {e}")
|
| 363 |
+
|
| 364 |
+
async def demo_service_discovery(self):
|
| 365 |
+
"""Demonstrate service discovery"""
|
| 366 |
+
self.print_section("Service Discovery")
|
| 367 |
+
|
| 368 |
+
try:
|
| 369 |
+
response = await self.session.get(f"{UNIFIED_URL}/services")
|
| 370 |
+
|
| 371 |
+
if response.status_code == 200:
|
| 372 |
+
data = response.json()
|
| 373 |
+
services = data.get("services", {})
|
| 374 |
+
unified = data.get("unified", {})
|
| 375 |
+
|
| 376 |
+
print(f"π Service discovery successful:")
|
| 377 |
+
print(f" Unified endpoint: {unified.get('url', 'N/A')}")
|
| 378 |
+
|
| 379 |
+
for service_name, service_info in services.items():
|
| 380 |
+
endpoints = service_info.get("endpoints", [])
|
| 381 |
+
description = service_info.get("description", "No description")
|
| 382 |
+
url = service_info.get("url", "N/A")
|
| 383 |
+
|
| 384 |
+
print(f"\n π‘ {service_name.upper()} Service:")
|
| 385 |
+
print(f" URL: {url}")
|
| 386 |
+
print(f" Description: {description}")
|
| 387 |
+
print(f" Endpoints: {len(endpoints)} available")
|
| 388 |
+
|
| 389 |
+
# Show a few example endpoints
|
| 390 |
+
for endpoint in endpoints[:3]:
|
| 391 |
+
print(f" β’ {endpoint}")
|
| 392 |
+
if len(endpoints) > 3:
|
| 393 |
+
print(f" β’ ... and {len(endpoints) - 3} more")
|
| 394 |
+
else:
|
| 395 |
+
print(f"β Service discovery failed: HTTP {response.status_code}")
|
| 396 |
+
|
| 397 |
+
except Exception as e:
|
| 398 |
+
print(f"β Service discovery error: {e}")
|
| 399 |
+
|
| 400 |
+
def print_demo_summary(self):
|
| 401 |
+
"""Print summary of demo results"""
|
| 402 |
+
self.print_section("Demo Summary")
|
| 403 |
+
|
| 404 |
+
if not self.demo_results:
|
| 405 |
+
print("No analysis results to summarize.")
|
| 406 |
+
return
|
| 407 |
+
|
| 408 |
+
total_entities = 0
|
| 409 |
+
total_relationships = 0
|
| 410 |
+
languages_detected = set()
|
| 411 |
+
|
| 412 |
+
for title, data in self.demo_results.items():
|
| 413 |
+
ner_analysis = data.get("ner_analysis", {})
|
| 414 |
+
if ner_analysis:
|
| 415 |
+
entities = ner_analysis.get("entities", [])
|
| 416 |
+
relationships = ner_analysis.get("relationships", [])
|
| 417 |
+
language = ner_analysis.get("language", "unknown")
|
| 418 |
+
|
| 419 |
+
total_entities += len(entities)
|
| 420 |
+
total_relationships += len(relationships)
|
| 421 |
+
languages_detected.add(language)
|
| 422 |
+
|
| 423 |
+
print(f"π {title}:")
|
| 424 |
+
print(f" Language: {language}")
|
| 425 |
+
print(f" Entities: {len(entities)}")
|
| 426 |
+
print(f" Relationships: {len(relationships)}")
|
| 427 |
+
|
| 428 |
+
print(f"\nπ― Overall Demo Statistics:")
|
| 429 |
+
print(f" Total analyses: {len(self.demo_results)}")
|
| 430 |
+
print(f" Total entities extracted: {total_entities}")
|
| 431 |
+
print(f" Total relationships found: {total_relationships}")
|
| 432 |
+
print(f" Languages detected: {', '.join(languages_detected)}")
|
| 433 |
+
|
| 434 |
+
print(f"\n⨠Capabilities Demonstrated:")
|
| 435 |
+
print(f" β
Multi-language NER analysis (Thai + English)")
|
| 436 |
+
print(f" β
Relationship extraction and mapping")
|
| 437 |
+
print(f" β
RAG document indexing")
|
| 438 |
+
print(f" β
Combined search with NER enhancement")
|
| 439 |
+
print(f" β
Service proxy functionality")
|
| 440 |
+
print(f" β
Unified workflow coordination")
|
| 441 |
+
print(f" β
Real-time processing and analysis")
|
| 442 |
+
|
| 443 |
+
async def run_interactive_demo(self):
|
| 444 |
+
"""Run the complete interactive demo"""
|
| 445 |
+
self.print_header("Unified AI Services - Interactive Demo")
|
| 446 |
+
|
| 447 |
+
print("This demo will showcase the capabilities of the unified AI system:")
|
| 448 |
+
print("β’ Multi-language NER analysis with relationship extraction")
|
| 449 |
+
print("β’ RAG document indexing and vector search")
|
| 450 |
+
print("β’ Combined workflows and service coordination")
|
| 451 |
+
print("β’ Service proxy functionality")
|
| 452 |
+
print("β’ Real-time health monitoring")
|
| 453 |
+
|
| 454 |
+
# Check system health
|
| 455 |
+
print("\nπ Checking system health...")
|
| 456 |
+
if not await self.check_system_health():
|
| 457 |
+
print("\nβ Demo cannot proceed - system is not healthy")
|
| 458 |
+
return False
|
| 459 |
+
|
| 460 |
+
# Demo 1: Unified Analysis
|
| 461 |
+
self.print_header("Demo 1: Unified Analysis Capabilities")
|
| 462 |
+
|
| 463 |
+
for title, text in DEMO_TEXTS.items():
|
| 464 |
+
await self.demo_unified_analysis(text, title.replace("_", " ").title())
|
| 465 |
+
# Small delay between analyses
|
| 466 |
+
await asyncio.sleep(1)
|
| 467 |
+
|
| 468 |
+
# Demo 2: Combined Search
|
| 469 |
+
self.print_header("Demo 2: Combined Search with NER Enhancement")
|
| 470 |
+
await self.demo_combined_search()
|
| 471 |
+
|
| 472 |
+
# Demo 3: Service Proxies
|
| 473 |
+
self.print_header("Demo 3: Service Proxy Functionality")
|
| 474 |
+
await self.demo_service_proxies()
|
| 475 |
+
|
| 476 |
+
# Demo 4: Service Discovery
|
| 477 |
+
self.print_header("Demo 4: Service Discovery")
|
| 478 |
+
await self.demo_service_discovery()
|
| 479 |
+
|
| 480 |
+
# Summary
|
| 481 |
+
self.print_header("Demo Complete")
|
| 482 |
+
self.print_demo_summary()
|
| 483 |
+
|
| 484 |
+
print(f"\nπ Demo completed successfully!")
|
| 485 |
+
print(f"π For more information, visit: http://localhost:8000/docs")
|
| 486 |
+
|
| 487 |
+
return True
|
| 488 |
+
|
| 489 |
+
async def main():
|
| 490 |
+
"""Main demo function"""
|
| 491 |
+
print("π¬ Unified AI Services - Interactive Demo")
|
| 492 |
+
print("=" * 50)
|
| 493 |
+
|
| 494 |
+
if len(sys.argv) > 1:
|
| 495 |
+
unified_url = sys.argv[1]
|
| 496 |
+
global UNIFIED_URL
|
| 497 |
+
UNIFIED_URL = unified_url
|
| 498 |
+
|
| 499 |
+
print(f"π― Demo target: {UNIFIED_URL}")
|
| 500 |
+
print("\nMake sure the unified application is running:")
|
| 501 |
+
print(" python app.py")
|
| 502 |
+
|
| 503 |
+
# Wait for user confirmation
|
| 504 |
+
try:
|
| 505 |
+
input("\nPress Enter to start the demo (or Ctrl+C to cancel)...")
|
| 506 |
+
except KeyboardInterrupt:
|
| 507 |
+
print("\nDemo cancelled.")
|
| 508 |
+
return
|
| 509 |
+
|
| 510 |
+
async with UnifiedDemo() as demo:
|
| 511 |
+
success = await demo.run_interactive_demo()
|
| 512 |
+
|
| 513 |
+
if success:
|
| 514 |
+
print(f"\nπ Demo completed successfully!")
|
| 515 |
+
print(f"The unified AI services are working perfectly.")
|
| 516 |
+
else:
|
| 517 |
+
print(f"\nβ οΈ Demo encountered some issues.")
|
| 518 |
+
print(f"Please check the system health and try again.")
|
| 519 |
+
|
| 520 |
+
if __name__ == "__main__":
|
| 521 |
+
try:
|
| 522 |
+
asyncio.run(main())
|
| 523 |
+
except KeyboardInterrupt:
|
| 524 |
+
print("\n\nπ Demo interrupted by user")
|
| 525 |
+
except Exception as e:
|
| 526 |
+
print(f"\nβ Demo failed: {e}")
|
| 527 |
+
sys.exit(1)
|
gettingstart.md
ADDED
|
@@ -0,0 +1,485 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Getting Started with Unified AI Services
|
| 2 |
+
|
| 3 |
+
This guide will walk you through setting up and running the complete Unified AI Services system.
|
| 4 |
+
|
| 5 |
+
## π Quick Overview
|
| 6 |
+
|
| 7 |
+
The Unified AI Services system consists of:
|
| 8 |
+
- **NER Service** (Port 8500): Named Entity Recognition with relationship extraction
|
| 9 |
+
- **OCR Service** (Port 8400): Optical Character Recognition with document processing
|
| 10 |
+
- **RAG Service** (Port 8401): Retrieval-Augmented Generation with vector search
|
| 11 |
+
- **Unified App** (Port 8000): Main application coordinating all services
|
| 12 |
+
|
| 13 |
+
## π Quick Start (Recommended)
|
| 14 |
+
|
| 15 |
+
### Step 1: Automated Setup
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
# Run the automated setup wizard
|
| 19 |
+
python setup.py
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
This will:
|
| 23 |
+
- β
Check your Python environment
|
| 24 |
+
- β
Create necessary directories
|
| 25 |
+
- β
Help configure your .env file
|
| 26 |
+
- β
Install dependencies
|
| 27 |
+
- β
Validate configuration
|
| 28 |
+
- β
Create startup scripts
|
| 29 |
+
|
| 30 |
+
### Step 2: Start the System
|
| 31 |
+
|
| 32 |
+
```bash
|
| 33 |
+
# Start all services automatically
|
| 34 |
+
python app.py
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
Or use the generated scripts:
|
| 38 |
+
- **Windows**: Double-click `start_services.bat`
|
| 39 |
+
- **Linux/Mac**: Run `./start_services.sh`
|
| 40 |
+
|
| 41 |
+
### Step 3: Test the System
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
# Run comprehensive tests
|
| 45 |
+
python test_unified.py
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
Or use the generated scripts:
|
| 49 |
+
- **Windows**: Double-click `run_tests.bat`
|
| 50 |
+
- **Linux/Mac**: Run `./run_tests.sh`
|
| 51 |
+
|
| 52 |
+
### Step 4: Try the Demo
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
# Run interactive demo
|
| 56 |
+
python demo.py
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
## π File Structure
|
| 60 |
+
|
| 61 |
+
After setup, your directory should look like this:
|
| 62 |
+
|
| 63 |
+
```
|
| 64 |
+
unified-ai-services/
|
| 65 |
+
βββ app.py # π Main unified application
|
| 66 |
+
βββ configs.py # βοΈ Configuration management
|
| 67 |
+
βββ setup.py # π οΈ Automated setup script
|
| 68 |
+
βββ manage_services.py # π§ Service management tool
|
| 69 |
+
βββ test_unified.py # π§ͺ Comprehensive test suite
|
| 70 |
+
βββ demo.py # π¬ Interactive demo
|
| 71 |
+
βββ requirements.txt # π¦ Python dependencies
|
| 72 |
+
βββ .env # π Environment configuration
|
| 73 |
+
βββ README.md # π Documentation
|
| 74 |
+
βββ GETTING_STARTED.md # π This file
|
| 75 |
+
βββ services/ # π Service implementations
|
| 76 |
+
β βββ ner_service.py # Named Entity Recognition
|
| 77 |
+
β βββ ocr_service.py # Optical Character Recognition
|
| 78 |
+
β βββ rag_service.py # Retrieval-Augmented Generation
|
| 79 |
+
βββ exports/ # π Generated export files
|
| 80 |
+
βββ logs/ # π Application logs
|
| 81 |
+
βββ temp/ # ποΈ Temporary files
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
## βοΈ Manual Setup (Alternative)
|
| 85 |
+
|
| 86 |
+
If you prefer manual setup:
|
| 87 |
+
|
| 88 |
+
### Prerequisites
|
| 89 |
+
- Python 3.8 or higher
|
| 90 |
+
- PostgreSQL with vector extension
|
| 91 |
+
- Azure OpenAI account
|
| 92 |
+
- Azure Document Intelligence account
|
| 93 |
+
- DeepSeek API account
|
| 94 |
+
|
| 95 |
+
### 1. Install Dependencies
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
pip install -r requirements.txt
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### 2. Configure Environment
|
| 102 |
+
|
| 103 |
+
Create a `.env` file with your configuration:
|
| 104 |
+
|
| 105 |
+
```bash
|
| 106 |
+
# Server Configuration
|
| 107 |
+
HOST=0.0.0.0
|
| 108 |
+
MAIN_PORT=8000
|
| 109 |
+
NER_PORT=8500
|
| 110 |
+
OCR_PORT=8400
|
| 111 |
+
RAG_PORT=8401
|
| 112 |
+
|
| 113 |
+
# PostgreSQL Configuration
|
| 114 |
+
POSTGRES_HOST=your-postgres-server.com
|
| 115 |
+
POSTGRES_PORT=5432
|
| 116 |
+
POSTGRES_USER=your-username
|
| 117 |
+
POSTGRES_PASSWORD=your-password
|
| 118 |
+
POSTGRES_DATABASE=postgres
|
| 119 |
+
|
| 120 |
+
# Azure OpenAI Configuration
|
| 121 |
+
AZURE_OPENAI_ENDPOINT=https://your-openai.openai.azure.com/
|
| 122 |
+
AZURE_OPENAI_API_KEY=your-api-key
|
| 123 |
+
EMBEDDING_MODEL=text-embedding-3-large
|
| 124 |
+
|
| 125 |
+
# DeepSeek Configuration (for advanced NER)
|
| 126 |
+
DEEPSEEK_ENDPOINT=https://your-deepseek-endpoint/
|
| 127 |
+
DEEPSEEK_API_KEY=your-deepseek-key
|
| 128 |
+
DEEPSEEK_MODEL=DeepSeek-R1-0528
|
| 129 |
+
|
| 130 |
+
# Azure Document Intelligence Configuration
|
| 131 |
+
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://your-di.cognitiveservices.azure.com/
|
| 132 |
+
AZURE_DOCUMENT_INTELLIGENCE_KEY=your-di-key
|
| 133 |
+
|
| 134 |
+
# Azure Storage Configuration
|
| 135 |
+
AZURE_STORAGE_ACCOUNT_URL=https://yourstorage.blob.core.windows.net/
|
| 136 |
+
AZURE_BLOB_SAS_TOKEN=your-sas-token
|
| 137 |
+
BLOB_CONTAINER=historylog
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
### 3. Create Directory Structure
|
| 141 |
+
|
| 142 |
+
```bash
|
| 143 |
+
mkdir -p services exports logs temp tests data
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
### 4. Place Service Files
|
| 147 |
+
|
| 148 |
+
Ensure your service files are in the correct locations:
|
| 149 |
+
- `services/ner_service.py`
|
| 150 |
+
- `services/ocr_service.py`
|
| 151 |
+
- `services/rag_service.py`
|
| 152 |
+
|
| 153 |
+
## π§ Service Management
|
| 154 |
+
|
| 155 |
+
### Using the Service Manager
|
| 156 |
+
|
| 157 |
+
The `manage_services.py` script provides easy service management:
|
| 158 |
+
|
| 159 |
+
```bash
|
| 160 |
+
# Start individual services
|
| 161 |
+
python manage_services.py start ner
|
| 162 |
+
python manage_services.py start ocr
|
| 163 |
+
python manage_services.py start rag
|
| 164 |
+
python manage_services.py start unified
|
| 165 |
+
|
| 166 |
+
# Start all services
|
| 167 |
+
python manage_services.py start all
|
| 168 |
+
|
| 169 |
+
# Check status
|
| 170 |
+
python manage_services.py status
|
| 171 |
+
|
| 172 |
+
# Test services
|
| 173 |
+
python manage_services.py test ner
|
| 174 |
+
python manage_services.py test all
|
| 175 |
+
|
| 176 |
+
# Stop services
|
| 177 |
+
python manage_services.py stop all
|
| 178 |
+
|
| 179 |
+
# Restart services
|
| 180 |
+
python manage_services.py restart all
|
| 181 |
+
|
| 182 |
+
# List available services
|
| 183 |
+
python manage_services.py list
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
### Direct Service Management
|
| 187 |
+
|
| 188 |
+
Start services individually for development:
|
| 189 |
+
|
| 190 |
+
```bash
|
| 191 |
+
# Terminal 1: Start OCR service
|
| 192 |
+
cd services && python ocr_service.py
|
| 193 |
+
|
| 194 |
+
# Terminal 2: Start RAG service
|
| 195 |
+
cd services && python rag_service.py
|
| 196 |
+
|
| 197 |
+
# Terminal 3: Start NER service
|
| 198 |
+
cd services && python ner_service.py
|
| 199 |
+
|
| 200 |
+
# Terminal 4: Start unified application
|
| 201 |
+
python app.py
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
## π§ͺ Testing and Validation
|
| 205 |
+
|
| 206 |
+
### Comprehensive System Tests
|
| 207 |
+
|
| 208 |
+
```bash
|
| 209 |
+
# Run all tests
|
| 210 |
+
python test_unified.py
|
| 211 |
+
|
| 212 |
+
# Test output will show:
|
| 213 |
+
# β
Unified App Health Check
|
| 214 |
+
# β
Individual Service Health
|
| 215 |
+
# β
Unified Analysis (Text)
|
| 216 |
+
# β
Unified Analysis (URL)
|
| 217 |
+
# β
Combined Search
|
| 218 |
+
# β
Service Proxies
|
| 219 |
+
# β
File Upload (Unified)
|
| 220 |
+
# β
Service Discovery
|
| 221 |
+
# β
System Performance
|
| 222 |
+
# β
Error Handling
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
### Individual Service Tests
|
| 226 |
+
|
| 227 |
+
```bash
|
| 228 |
+
# Test NER service specifically
|
| 229 |
+
python test_ner.py
|
| 230 |
+
|
| 231 |
+
# Test RAG service specifically
|
| 232 |
+
python test_rag.py
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
### Quick Health Checks
|
| 236 |
+
|
| 237 |
+
```bash
|
| 238 |
+
# Check unified system
|
| 239 |
+
curl http://localhost:8000/health
|
| 240 |
+
|
| 241 |
+
# Check individual services
|
| 242 |
+
curl http://localhost:8500/health # NER
|
| 243 |
+
curl http://localhost:8400/health # OCR
|
| 244 |
+
curl http://localhost:8401/health # RAG
|
| 245 |
+
```
|
| 246 |
+
|
| 247 |
+
## π¬ Interactive Demo
|
| 248 |
+
|
| 249 |
+
The demo script showcases all system capabilities:
|
| 250 |
+
|
| 251 |
+
```bash
|
| 252 |
+
python demo.py
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
Demo includes:
|
| 256 |
+
- Multi-language text analysis (Thai + English)
|
| 257 |
+
- Entity and relationship extraction
|
| 258 |
+
- RAG document indexing
|
| 259 |
+
- Combined search functionality
|
| 260 |
+
- Service proxy testing
|
| 261 |
+
- Real-time performance monitoring
|
| 262 |
+
|
| 263 |
+
## π API Usage
|
| 264 |
+
|
| 265 |
+
### API Documentation
|
| 266 |
+
|
| 267 |
+
Once running, access interactive documentation:
|
| 268 |
+
- **Unified API**: http://localhost:8000/docs
|
| 269 |
+
- **NER Service**: http://localhost:8500/docs
|
| 270 |
+
- **OCR Service**: http://localhost:8400/docs
|
| 271 |
+
- **RAG Service**: http://localhost:8401/docs
|
| 272 |
+
|
| 273 |
+
### Key Endpoints
|
| 274 |
+
|
| 275 |
+
#### Unified Analysis
|
| 276 |
+
```python
|
| 277 |
+
# Analyze text with automatic RAG indexing
|
| 278 |
+
POST /analyze/unified
|
| 279 |
+
{
|
| 280 |
+
"text": "Your text here...",
|
| 281 |
+
"extract_relationships": true,
|
| 282 |
+
"enable_rag_indexing": true,
|
| 283 |
+
"rag_title": "Document Title"
|
| 284 |
+
}
|
| 285 |
+
```
|
| 286 |
+
|
| 287 |
+
#### Combined Search
|
| 288 |
+
```python
|
| 289 |
+
# Search with automatic NER enhancement
|
| 290 |
+
POST /search/combined
|
| 291 |
+
{
|
| 292 |
+
"query": "search terms",
|
| 293 |
+
"include_ner_analysis": true,
|
| 294 |
+
"limit": 10
|
| 295 |
+
}
|
| 296 |
+
```
|
| 297 |
+
|
| 298 |
+
#### Service Proxies
|
| 299 |
+
```python
|
| 300 |
+
# Direct access to individual services
|
| 301 |
+
POST /ner/analyze/text # NER analysis
|
| 302 |
+
POST /ocr/upload # OCR processing
|
| 303 |
+
POST /rag/search # RAG search
|
| 304 |
+
GET /rag/documents # List documents
|
| 305 |
+
```
|
| 306 |
+
|
| 307 |
+
## π Health Monitoring
|
| 308 |
+
|
| 309 |
+
### System Status
|
| 310 |
+
|
| 311 |
+
```bash
|
| 312 |
+
# Get overall system health
|
| 313 |
+
GET /health
|
| 314 |
+
|
| 315 |
+
# Get detailed status
|
| 316 |
+
GET /status
|
| 317 |
+
|
| 318 |
+
# Discover available services
|
| 319 |
+
GET /services
|
| 320 |
+
```
|
| 321 |
+
|
| 322 |
+
### Service Monitoring
|
| 323 |
+
|
| 324 |
+
Each service provides health information:
|
| 325 |
+
- Response times
|
| 326 |
+
- Uptime
|
| 327 |
+
- Resource usage
|
| 328 |
+
- Configuration status
|
| 329 |
+
- Error rates
|
| 330 |
+
|
| 331 |
+
## π οΈ Troubleshooting
|
| 332 |
+
|
| 333 |
+
### Common Issues
|
| 334 |
+
|
| 335 |
+
#### 1. Services Won't Start
|
| 336 |
+
|
| 337 |
+
**Check ports:**
|
| 338 |
+
```bash
|
| 339 |
+
netstat -an | grep :8000
|
| 340 |
+
netstat -an | grep :8500
|
| 341 |
+
netstat -an | grep :8400
|
| 342 |
+
netstat -an | grep :8401
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
**Verify configuration:**
|
| 346 |
+
```bash
|
| 347 |
+
python configs.py
|
| 348 |
+
```
|
| 349 |
+
|
| 350 |
+
**Check dependencies:**
|
| 351 |
+
```bash
|
| 352 |
+
pip list | grep fastapi
|
| 353 |
+
pip list | grep asyncpg
|
| 354 |
+
```
|
| 355 |
+
|
| 356 |
+
#### 2. Database Connection Issues
|
| 357 |
+
|
| 358 |
+
**Test connection:**
|
| 359 |
+
```bash
|
| 360 |
+
# Use your actual connection details
|
| 361 |
+
python -c "
|
| 362 |
+
import asyncio
|
| 363 |
+
import asyncpg
|
| 364 |
+
|
| 365 |
+
async def test():
|
| 366 |
+
conn = await asyncpg.connect('postgresql://user:pass@host:5432/db')
|
| 367 |
+
print('Connected successfully')
|
| 368 |
+
await conn.close()
|
| 369 |
+
|
| 370 |
+
asyncio.run(test())
|
| 371 |
+
"
|
| 372 |
+
```
|
| 373 |
+
|
| 374 |
+
**Common fixes:**
|
| 375 |
+
- Verify PostgreSQL is running
|
| 376 |
+
- Check firewall rules
|
| 377 |
+
- Confirm SSL requirements
|
| 378 |
+
- Validate credentials
|
| 379 |
+
|
| 380 |
+
#### 3. Azure Service Issues
|
| 381 |
+
|
| 382 |
+
**Check API keys:**
|
| 383 |
+
```bash
|
| 384 |
+
# Test Azure OpenAI
|
| 385 |
+
curl -H "api-key: YOUR_KEY" "YOUR_ENDPOINT/openai/deployments/YOUR_MODEL/embeddings?api-version=2024-02-01"
|
| 386 |
+
|
| 387 |
+
# Test Document Intelligence
|
| 388 |
+
curl -H "Ocp-Apim-Subscription-Key: YOUR_KEY" "YOUR_ENDPOINT/formrecognizer/info?api-version=2023-07-31"
|
| 389 |
+
```
|
| 390 |
+
|
| 391 |
+
**Common fixes:**
|
| 392 |
+
- Verify API keys are correct
|
| 393 |
+
- Check service regions
|
| 394 |
+
- Confirm quota limits
|
| 395 |
+
- Validate endpoint URLs
|
| 396 |
+
|
| 397 |
+
#### 4. Performance Issues
|
| 398 |
+
|
| 399 |
+
**Monitor resources:**
|
| 400 |
+
```bash
|
| 401 |
+
# Check system resources
|
| 402 |
+
top
|
| 403 |
+
htop
|
| 404 |
+
python manage_services.py status
|
| 405 |
+
```
|
| 406 |
+
|
| 407 |
+
**Common solutions:**
|
| 408 |
+
- Increase system memory
|
| 409 |
+
- Optimize database queries
|
| 410 |
+
- Reduce concurrent requests
|
| 411 |
+
- Check network latency
|
| 412 |
+
|
| 413 |
+
### Getting Help
|
| 414 |
+
|
| 415 |
+
1. **Check logs**: Services log to console
|
| 416 |
+
2. **Run health checks**: Use `/health` endpoints
|
| 417 |
+
3. **Validate configuration**: Run `python configs.py`
|
| 418 |
+
4. **Test individual services**: Use service manager
|
| 419 |
+
5. **Check database connectivity**: Test connection strings
|
| 420 |
+
6. **Verify Azure services**: Check API endpoints
|
| 421 |
+
|
| 422 |
+
### Debug Mode
|
| 423 |
+
|
| 424 |
+
Enable debug mode for detailed logging:
|
| 425 |
+
|
| 426 |
+
```bash
|
| 427 |
+
# In .env file
|
| 428 |
+
DEBUG=True
|
| 429 |
+
|
| 430 |
+
# Or set environment variable
|
| 431 |
+
export DEBUG=true
|
| 432 |
+
python app.py
|
| 433 |
+
```
|
| 434 |
+
|
| 435 |
+
## π Production Deployment
|
| 436 |
+
|
| 437 |
+
### Security Considerations
|
| 438 |
+
|
| 439 |
+
1. **Environment Variables**: Use secure secret management
|
| 440 |
+
2. **HTTPS**: Enable SSL/TLS in production
|
| 441 |
+
3. **Authentication**: Implement API authentication
|
| 442 |
+
4. **Rate Limiting**: Add request rate limiting
|
| 443 |
+
5. **Input Validation**: Validate all input data
|
| 444 |
+
|
| 445 |
+
### Performance Optimization
|
| 446 |
+
|
| 447 |
+
1. **Caching**: Implement Redis caching
|
| 448 |
+
2. **Load Balancing**: Use reverse proxy (nginx)
|
| 449 |
+
3. **Database**: Optimize PostgreSQL configuration
|
| 450 |
+
4. **Monitoring**: Set up application monitoring
|
| 451 |
+
5. **Scaling**: Consider horizontal scaling
|
| 452 |
+
|
| 453 |
+
### Deployment Options
|
| 454 |
+
|
| 455 |
+
1. **Docker**: Containerize services
|
| 456 |
+
2. **Cloud**: Deploy to Azure/AWS/GCP
|
| 457 |
+
3. **Kubernetes**: Orchestrate with k8s
|
| 458 |
+
4. **CI/CD**: Automate deployments
|
| 459 |
+
|
| 460 |
+
## π Next Steps
|
| 461 |
+
|
| 462 |
+
After successful setup:
|
| 463 |
+
|
| 464 |
+
1. **Explore the API**: Use the interactive documentation
|
| 465 |
+
2. **Try the demo**: Run `python demo.py`
|
| 466 |
+
3. **Run tests**: Execute `python test_unified.py`
|
| 467 |
+
4. **Monitor system**: Check health endpoints
|
| 468 |
+
5. **Customize**: Modify services for your needs
|
| 469 |
+
6. **Scale**: Consider production deployment
|
| 470 |
+
|
| 471 |
+
## π― Success Indicators
|
| 472 |
+
|
| 473 |
+
You know the system is working when:
|
| 474 |
+
- β
All health checks pass
|
| 475 |
+
- β
Tests complete successfully
|
| 476 |
+
- β
Demo runs without errors
|
| 477 |
+
- β
API documentation is accessible
|
| 478 |
+
- β
Services respond to requests
|
| 479 |
+
- β
Database connections work
|
| 480 |
+
- β
Azure integrations function
|
| 481 |
+
- β
File uploads process correctly
|
| 482 |
+
- β
Search returns results
|
| 483 |
+
- β
Export files generate properly
|
| 484 |
+
|
| 485 |
+
**Congratulations! Your Unified AI Services system is ready to use! π**
|
manage_services.py
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Service Management Tool for Unified AI Services
|
| 4 |
+
Helps start, stop, monitor, and troubleshoot individual services
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import time
|
| 10 |
+
import signal
|
| 11 |
+
import subprocess
|
| 12 |
+
import asyncio
|
| 13 |
+
import json
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from typing import Dict, List, Optional, Tuple
|
| 16 |
+
import argparse
|
| 17 |
+
|
| 18 |
+
import httpx
|
| 19 |
+
import psutil
|
| 20 |
+
|
| 21 |
+
# Import configuration if available
|
| 22 |
+
try:
|
| 23 |
+
from configs import get_config, validate_environment
|
| 24 |
+
config = get_config()
|
| 25 |
+
except ImportError:
|
| 26 |
+
print("β οΈ Could not import configs. Using default values.")
|
| 27 |
+
config = None
|
| 28 |
+
|
| 29 |
+
class ServiceManager:
|
| 30 |
+
"""Manages individual services for development and troubleshooting"""
|
| 31 |
+
|
| 32 |
+
def __init__(self):
|
| 33 |
+
self.processes: Dict[str, subprocess.Popen] = {}
|
| 34 |
+
self.service_configs = {
|
| 35 |
+
"ner": {
|
| 36 |
+
"script": "services/ner_service.py",
|
| 37 |
+
"port": 8500,
|
| 38 |
+
"description": "Named Entity Recognition with relationship extraction"
|
| 39 |
+
},
|
| 40 |
+
"ocr": {
|
| 41 |
+
"script": "services/ocr_service.py",
|
| 42 |
+
"port": 8400,
|
| 43 |
+
"description": "Optical Character Recognition with document processing"
|
| 44 |
+
},
|
| 45 |
+
"rag": {
|
| 46 |
+
"script": "services/rag_service.py",
|
| 47 |
+
"port": 8401,
|
| 48 |
+
"description": "Retrieval-Augmented Generation with vector search"
|
| 49 |
+
},
|
| 50 |
+
"unified": {
|
| 51 |
+
"script": "app.py",
|
| 52 |
+
"port": 8000,
|
| 53 |
+
"description": "Unified application coordinating all services"
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# Update ports from config if available
|
| 58 |
+
if config:
|
| 59 |
+
self.service_configs["ner"]["port"] = config.ner.PORT
|
| 60 |
+
self.service_configs["ocr"]["port"] = config.ocr.PORT
|
| 61 |
+
self.service_configs["rag"]["port"] = config.rag.PORT
|
| 62 |
+
self.service_configs["unified"]["port"] = config.MAIN_PORT
|
| 63 |
+
|
| 64 |
+
def print_header(self, title: str):
|
| 65 |
+
"""Print formatted header"""
|
| 66 |
+
print("\n" + "=" * 60)
|
| 67 |
+
print(f" {title}")
|
| 68 |
+
print("=" * 60)
|
| 69 |
+
|
| 70 |
+
def print_service_info(self, service_name: str):
|
| 71 |
+
"""Print service information"""
|
| 72 |
+
if service_name not in self.service_configs:
|
| 73 |
+
return
|
| 74 |
+
|
| 75 |
+
service = self.service_configs[service_name]
|
| 76 |
+
print(f"π {service_name.upper()} Service")
|
| 77 |
+
print(f" Description: {service['description']}")
|
| 78 |
+
print(f" Script: {service['script']}")
|
| 79 |
+
print(f" Port: {service['port']}")
|
| 80 |
+
print(f" URL: http://localhost:{service['port']}")
|
| 81 |
+
|
| 82 |
+
def is_port_in_use(self, port: int) -> bool:
|
| 83 |
+
"""Check if port is in use"""
|
| 84 |
+
try:
|
| 85 |
+
for conn in psutil.net_connections():
|
| 86 |
+
if conn.laddr.port == port:
|
| 87 |
+
return True
|
| 88 |
+
return False
|
| 89 |
+
except:
|
| 90 |
+
return False
|
| 91 |
+
|
| 92 |
+
async def check_service_health(self, service_name: str) -> Tuple[bool, Optional[Dict]]:
|
| 93 |
+
"""Check service health"""
|
| 94 |
+
if service_name not in self.service_configs:
|
| 95 |
+
return False, None
|
| 96 |
+
|
| 97 |
+
port = self.service_configs[service_name]["port"]
|
| 98 |
+
|
| 99 |
+
try:
|
| 100 |
+
async with httpx.AsyncClient() as client:
|
| 101 |
+
response = await client.get(
|
| 102 |
+
f"http://localhost:{port}/health",
|
| 103 |
+
timeout=5.0
|
| 104 |
+
)
|
| 105 |
+
if response.status_code == 200:
|
| 106 |
+
return True, response.json()
|
| 107 |
+
else:
|
| 108 |
+
return False, {"error": f"HTTP {response.status_code}"}
|
| 109 |
+
except Exception as e:
|
| 110 |
+
return False, {"error": str(e)}
|
| 111 |
+
|
| 112 |
+
def start_service(self, service_name: str) -> bool:
|
| 113 |
+
"""Start a specific service"""
|
| 114 |
+
if service_name not in self.service_configs:
|
| 115 |
+
print(f"β Unknown service: {service_name}")
|
| 116 |
+
return False
|
| 117 |
+
|
| 118 |
+
service = self.service_configs[service_name]
|
| 119 |
+
script_path = service["script"]
|
| 120 |
+
port = service["port"]
|
| 121 |
+
|
| 122 |
+
# Check if script exists
|
| 123 |
+
if not Path(script_path).exists():
|
| 124 |
+
print(f"β Service script not found: {script_path}")
|
| 125 |
+
return False
|
| 126 |
+
|
| 127 |
+
# Check if port is already in use
|
| 128 |
+
if self.is_port_in_use(port):
|
| 129 |
+
print(f"β οΈ Port {port} is already in use. Service may already be running.")
|
| 130 |
+
return False
|
| 131 |
+
|
| 132 |
+
# Check if service is already running in our process list
|
| 133 |
+
if service_name in self.processes:
|
| 134 |
+
process = self.processes[service_name]
|
| 135 |
+
if process.poll() is None: # Process is still running
|
| 136 |
+
print(f"β οΈ {service_name} service is already running (PID: {process.pid})")
|
| 137 |
+
return False
|
| 138 |
+
|
| 139 |
+
try:
|
| 140 |
+
print(f"π Starting {service_name} service...")
|
| 141 |
+
print(f" Script: {script_path}")
|
| 142 |
+
print(f" Port: {port}")
|
| 143 |
+
|
| 144 |
+
# Start the service
|
| 145 |
+
if sys.platform == "win32":
|
| 146 |
+
process = subprocess.Popen([
|
| 147 |
+
sys.executable, script_path
|
| 148 |
+
], creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
|
| 149 |
+
else:
|
| 150 |
+
process = subprocess.Popen([
|
| 151 |
+
sys.executable, script_path
|
| 152 |
+
], preexec_fn=os.setsid)
|
| 153 |
+
|
| 154 |
+
self.processes[service_name] = process
|
| 155 |
+
|
| 156 |
+
# Wait a moment for startup
|
| 157 |
+
time.sleep(2)
|
| 158 |
+
|
| 159 |
+
# Check if process is still running
|
| 160 |
+
if process.poll() is None:
|
| 161 |
+
print(f"β
{service_name} service started successfully (PID: {process.pid})")
|
| 162 |
+
return True
|
| 163 |
+
else:
|
| 164 |
+
print(f"β {service_name} service failed to start")
|
| 165 |
+
return False
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"β Failed to start {service_name} service: {e}")
|
| 169 |
+
return False
|
| 170 |
+
|
| 171 |
+
def stop_service(self, service_name: str) -> bool:
|
| 172 |
+
"""Stop a specific service"""
|
| 173 |
+
if service_name not in self.service_configs:
|
| 174 |
+
print(f"β Unknown service: {service_name}")
|
| 175 |
+
return False
|
| 176 |
+
|
| 177 |
+
port = self.service_configs[service_name]["port"]
|
| 178 |
+
|
| 179 |
+
# Try to stop our managed process first
|
| 180 |
+
if service_name in self.processes:
|
| 181 |
+
process = self.processes[service_name]
|
| 182 |
+
if process.poll() is None: # Process is still running
|
| 183 |
+
try:
|
| 184 |
+
print(f"π Stopping {service_name} service (PID: {process.pid})...")
|
| 185 |
+
|
| 186 |
+
if sys.platform == "win32":
|
| 187 |
+
process.send_signal(signal.CTRL_BREAK_EVENT)
|
| 188 |
+
else:
|
| 189 |
+
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
|
| 190 |
+
|
| 191 |
+
# Wait for graceful shutdown
|
| 192 |
+
try:
|
| 193 |
+
process.wait(timeout=10)
|
| 194 |
+
print(f"β
{service_name} service stopped")
|
| 195 |
+
del self.processes[service_name]
|
| 196 |
+
return True
|
| 197 |
+
except subprocess.TimeoutExpired:
|
| 198 |
+
print(f"β οΈ Force killing {service_name} service...")
|
| 199 |
+
process.kill()
|
| 200 |
+
del self.processes[service_name]
|
| 201 |
+
return True
|
| 202 |
+
|
| 203 |
+
except Exception as e:
|
| 204 |
+
print(f"β Error stopping {service_name} service: {e}")
|
| 205 |
+
return False
|
| 206 |
+
|
| 207 |
+
# Try to find and stop any process using the port
|
| 208 |
+
try:
|
| 209 |
+
for proc in psutil.process_iter(['pid', 'name', 'connections']):
|
| 210 |
+
try:
|
| 211 |
+
for conn in proc.info['connections'] or []:
|
| 212 |
+
if conn.laddr.port == port:
|
| 213 |
+
print(f"π Found process using port {port} (PID: {proc.pid})")
|
| 214 |
+
proc.terminate()
|
| 215 |
+
try:
|
| 216 |
+
proc.wait(timeout=5)
|
| 217 |
+
print(f"β
Process {proc.pid} terminated")
|
| 218 |
+
return True
|
| 219 |
+
except psutil.TimeoutExpired:
|
| 220 |
+
proc.kill()
|
| 221 |
+
print(f"β
Process {proc.pid} killed")
|
| 222 |
+
return True
|
| 223 |
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
| 224 |
+
continue
|
| 225 |
+
except Exception as e:
|
| 226 |
+
print(f"β Error finding process on port {port}: {e}")
|
| 227 |
+
|
| 228 |
+
print(f"β οΈ No running {service_name} service found")
|
| 229 |
+
return False
|
| 230 |
+
|
| 231 |
+
def stop_all_services(self):
|
| 232 |
+
"""Stop all managed services"""
|
| 233 |
+
print("π Stopping all services...")
|
| 234 |
+
|
| 235 |
+
for service_name in self.service_configs.keys():
|
| 236 |
+
self.stop_service(service_name)
|
| 237 |
+
|
| 238 |
+
async def get_service_status(self, service_name: str) -> Dict:
|
| 239 |
+
"""Get detailed service status"""
|
| 240 |
+
if service_name not in self.service_configs:
|
| 241 |
+
return {"status": "unknown", "error": "Unknown service"}
|
| 242 |
+
|
| 243 |
+
service = self.service_configs[service_name]
|
| 244 |
+
port = service["port"]
|
| 245 |
+
|
| 246 |
+
status = {
|
| 247 |
+
"name": service_name,
|
| 248 |
+
"description": service["description"],
|
| 249 |
+
"port": port,
|
| 250 |
+
"script": service["script"],
|
| 251 |
+
"managed_process": False,
|
| 252 |
+
"port_in_use": self.is_port_in_use(port),
|
| 253 |
+
"health_check": False,
|
| 254 |
+
"health_data": None
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
# Check if we have a managed process
|
| 258 |
+
if service_name in self.processes:
|
| 259 |
+
process = self.processes[service_name]
|
| 260 |
+
if process.poll() is None:
|
| 261 |
+
status["managed_process"] = True
|
| 262 |
+
status["pid"] = process.pid
|
| 263 |
+
try:
|
| 264 |
+
proc = psutil.Process(process.pid)
|
| 265 |
+
status["cpu_percent"] = proc.cpu_percent()
|
| 266 |
+
status["memory_mb"] = proc.memory_info().rss / 1024 / 1024
|
| 267 |
+
status["create_time"] = proc.create_time()
|
| 268 |
+
status["uptime"] = time.time() - proc.create_time()
|
| 269 |
+
except:
|
| 270 |
+
pass
|
| 271 |
+
|
| 272 |
+
# Check health endpoint
|
| 273 |
+
health_ok, health_data = await self.check_service_health(service_name)
|
| 274 |
+
status["health_check"] = health_ok
|
| 275 |
+
status["health_data"] = health_data
|
| 276 |
+
|
| 277 |
+
return status
|
| 278 |
+
|
| 279 |
+
async def status_all_services(self):
|
| 280 |
+
"""Show status of all services"""
|
| 281 |
+
self.print_header("Service Status Overview")
|
| 282 |
+
|
| 283 |
+
for service_name in self.service_configs.keys():
|
| 284 |
+
status = await self.get_service_status(service_name)
|
| 285 |
+
|
| 286 |
+
print(f"\nπ {service_name.upper()} Service")
|
| 287 |
+
print(f" Port: {status['port']}")
|
| 288 |
+
print(f" Script: {status['script']}")
|
| 289 |
+
|
| 290 |
+
if status["managed_process"]:
|
| 291 |
+
print(f" β
Managed process running (PID: {status.get('pid', 'unknown')})")
|
| 292 |
+
if 'uptime' in status:
|
| 293 |
+
uptime_str = f"{status['uptime']:.0f} seconds"
|
| 294 |
+
print(f" β±οΈ Uptime: {uptime_str}")
|
| 295 |
+
if 'cpu_percent' in status:
|
| 296 |
+
print(f" π» CPU: {status['cpu_percent']:.1f}%")
|
| 297 |
+
if 'memory_mb' in status:
|
| 298 |
+
print(f" π§ Memory: {status['memory_mb']:.1f} MB")
|
| 299 |
+
elif status["port_in_use"]:
|
| 300 |
+
print(f" β οΈ Port in use (external process)")
|
| 301 |
+
else:
|
| 302 |
+
print(f" β Not running")
|
| 303 |
+
|
| 304 |
+
if status["health_check"]:
|
| 305 |
+
print(f" β
Health check: OK")
|
| 306 |
+
if status["health_data"]:
|
| 307 |
+
health = status["health_data"]
|
| 308 |
+
if isinstance(health, dict) and "status" in health:
|
| 309 |
+
print(f" Status: {health['status']}")
|
| 310 |
+
else:
|
| 311 |
+
print(f" β Health check: Failed")
|
| 312 |
+
if status["health_data"] and "error" in status["health_data"]:
|
| 313 |
+
print(f" Error: {status['health_data']['error']}")
|
| 314 |
+
|
| 315 |
+
async def test_service(self, service_name: str):
|
| 316 |
+
"""Test a specific service"""
|
| 317 |
+
if service_name not in self.service_configs:
|
| 318 |
+
print(f"β Unknown service: {service_name}")
|
| 319 |
+
return
|
| 320 |
+
|
| 321 |
+
self.print_header(f"Testing {service_name.upper()} Service")
|
| 322 |
+
|
| 323 |
+
status = await self.get_service_status(service_name)
|
| 324 |
+
|
| 325 |
+
# Basic status
|
| 326 |
+
if not status["port_in_use"]:
|
| 327 |
+
print("β Service is not running")
|
| 328 |
+
return
|
| 329 |
+
|
| 330 |
+
if not status["health_check"]:
|
| 331 |
+
print("β Health check failed")
|
| 332 |
+
if status["health_data"]:
|
| 333 |
+
print(f" Error: {status['health_data']}")
|
| 334 |
+
return
|
| 335 |
+
|
| 336 |
+
print("β
Service is running and healthy")
|
| 337 |
+
|
| 338 |
+
# Service-specific tests
|
| 339 |
+
port = status["port"]
|
| 340 |
+
|
| 341 |
+
if service_name == "ner":
|
| 342 |
+
await self.test_ner_service(port)
|
| 343 |
+
elif service_name == "ocr":
|
| 344 |
+
await self.test_ocr_service(port)
|
| 345 |
+
elif service_name == "rag":
|
| 346 |
+
await self.test_rag_service(port)
|
| 347 |
+
elif service_name == "unified":
|
| 348 |
+
await self.test_unified_service(port)
|
| 349 |
+
|
| 350 |
+
async def test_ner_service(self, port: int):
|
| 351 |
+
"""Test NER service functionality"""
|
| 352 |
+
print("\nπ§ͺ Testing NER functionality...")
|
| 353 |
+
|
| 354 |
+
try:
|
| 355 |
+
test_data = {
|
| 356 |
+
"text": "John Smith works at Microsoft in Seattle.",
|
| 357 |
+
"extract_relationships": True,
|
| 358 |
+
"include_embeddings": False,
|
| 359 |
+
"generate_graph_files": False
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
async with httpx.AsyncClient() as client:
|
| 363 |
+
response = await client.post(
|
| 364 |
+
f"http://localhost:{port}/analyze/text",
|
| 365 |
+
json=test_data,
|
| 366 |
+
timeout=30.0
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
if response.status_code == 200:
|
| 370 |
+
result = response.json()
|
| 371 |
+
if result.get("success"):
|
| 372 |
+
entities = result.get("entities", [])
|
| 373 |
+
relationships = result.get("relationships", [])
|
| 374 |
+
print(f" β
NER analysis successful")
|
| 375 |
+
print(f" π Found {len(entities)} entities, {len(relationships)} relationships")
|
| 376 |
+
else:
|
| 377 |
+
print(f" β NER analysis failed: {result.get('error', 'Unknown error')}")
|
| 378 |
+
else:
|
| 379 |
+
print(f" β NER test failed: HTTP {response.status_code}")
|
| 380 |
+
|
| 381 |
+
except Exception as e:
|
| 382 |
+
print(f" β NER test error: {e}")
|
| 383 |
+
|
| 384 |
+
async def test_ocr_service(self, port: int):
|
| 385 |
+
"""Test OCR service functionality"""
|
| 386 |
+
print("\nπ§ͺ Testing OCR functionality...")
|
| 387 |
+
|
| 388 |
+
try:
|
| 389 |
+
async with httpx.AsyncClient() as client:
|
| 390 |
+
# Test health endpoint (OCR doesn't have complex test without files)
|
| 391 |
+
response = await client.get(f"http://localhost:{port}/health")
|
| 392 |
+
|
| 393 |
+
if response.status_code == 200:
|
| 394 |
+
print(" β
OCR service is responsive")
|
| 395 |
+
else:
|
| 396 |
+
print(f" β OCR test failed: HTTP {response.status_code}")
|
| 397 |
+
|
| 398 |
+
except Exception as e:
|
| 399 |
+
print(f" β OCR test error: {e}")
|
| 400 |
+
|
| 401 |
+
async def test_rag_service(self, port: int):
|
| 402 |
+
"""Test RAG service functionality"""
|
| 403 |
+
print("\nπ§ͺ Testing RAG functionality...")
|
| 404 |
+
|
| 405 |
+
try:
|
| 406 |
+
async with httpx.AsyncClient() as client:
|
| 407 |
+
# Test document listing
|
| 408 |
+
response = await client.get(f"http://localhost:{port}/documents?limit=5")
|
| 409 |
+
|
| 410 |
+
if response.status_code == 200:
|
| 411 |
+
result = response.json()
|
| 412 |
+
documents = result.get("documents", [])
|
| 413 |
+
print(f" β
RAG service is responsive")
|
| 414 |
+
print(f" π Found {len(documents)} documents in database")
|
| 415 |
+
else:
|
| 416 |
+
print(f" β RAG test failed: HTTP {response.status_code}")
|
| 417 |
+
|
| 418 |
+
except Exception as e:
|
| 419 |
+
print(f" β RAG test error: {e}")
|
| 420 |
+
|
| 421 |
+
async def test_unified_service(self, port: int):
|
| 422 |
+
"""Test unified service functionality"""
|
| 423 |
+
print("\nπ§ͺ Testing Unified functionality...")
|
| 424 |
+
|
| 425 |
+
try:
|
| 426 |
+
async with httpx.AsyncClient() as client:
|
| 427 |
+
# Test service discovery
|
| 428 |
+
response = await client.get(f"http://localhost:{port}/services")
|
| 429 |
+
|
| 430 |
+
if response.status_code == 200:
|
| 431 |
+
result = response.json()
|
| 432 |
+
services = result.get("services", {})
|
| 433 |
+
print(f" β
Unified service is responsive")
|
| 434 |
+
print(f" π Discovered {len(services)} services")
|
| 435 |
+
else:
|
| 436 |
+
print(f" β Unified test failed: HTTP {response.status_code}")
|
| 437 |
+
|
| 438 |
+
except Exception as e:
|
| 439 |
+
print(f" β Unified test error: {e}")
|
| 440 |
+
|
| 441 |
+
def list_services(self):
|
| 442 |
+
"""List all available services"""
|
| 443 |
+
self.print_header("Available Services")
|
| 444 |
+
|
| 445 |
+
for service_name, service in self.service_configs.items():
|
| 446 |
+
print(f"\nπ {service_name}")
|
| 447 |
+
print(f" Description: {service['description']}")
|
| 448 |
+
print(f" Script: {service['script']}")
|
| 449 |
+
print(f" Port: {service['port']}")
|
| 450 |
+
print(f" URL: http://localhost:{service['port']}")
|
| 451 |
+
|
| 452 |
+
async def main():
|
| 453 |
+
"""Main function with command line interface"""
|
| 454 |
+
parser = argparse.ArgumentParser(
|
| 455 |
+
description="Service Management Tool for Unified AI Services",
|
| 456 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 457 |
+
epilog="""
|
| 458 |
+
Examples:
|
| 459 |
+
python manage_services.py start ner # Start NER service
|
| 460 |
+
python manage_services.py stop all # Stop all services
|
| 461 |
+
python manage_services.py status # Show status of all services
|
| 462 |
+
python manage_services.py test rag # Test RAG service
|
| 463 |
+
python manage_services.py list # List available services
|
| 464 |
+
"""
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
parser.add_argument(
|
| 468 |
+
"action",
|
| 469 |
+
choices=["start", "stop", "restart", "status", "test", "list"],
|
| 470 |
+
help="Action to perform"
|
| 471 |
+
)
|
| 472 |
+
|
| 473 |
+
parser.add_argument(
|
| 474 |
+
"service",
|
| 475 |
+
nargs="?",
|
| 476 |
+
choices=["ner", "ocr", "rag", "unified", "all"],
|
| 477 |
+
help="Service to act on (use 'all' for all services)"
|
| 478 |
+
)
|
| 479 |
+
|
| 480 |
+
args = parser.parse_args()
|
| 481 |
+
|
| 482 |
+
manager = ServiceManager()
|
| 483 |
+
|
| 484 |
+
# Handle actions that don't require a service argument
|
| 485 |
+
if args.action == "list":
|
| 486 |
+
manager.list_services()
|
| 487 |
+
return
|
| 488 |
+
|
| 489 |
+
if args.action == "status":
|
| 490 |
+
await manager.status_all_services()
|
| 491 |
+
return
|
| 492 |
+
|
| 493 |
+
# Validate service argument for other actions
|
| 494 |
+
if not args.service:
|
| 495 |
+
print("β Service argument is required for this action")
|
| 496 |
+
parser.print_help()
|
| 497 |
+
return
|
| 498 |
+
|
| 499 |
+
# Handle service-specific actions
|
| 500 |
+
if args.action == "start":
|
| 501 |
+
if args.service == "all":
|
| 502 |
+
# Start services in dependency order
|
| 503 |
+
services_order = ["ocr", "rag", "ner", "unified"]
|
| 504 |
+
for service in services_order:
|
| 505 |
+
success = manager.start_service(service)
|
| 506 |
+
if success:
|
| 507 |
+
# Wait a moment between services
|
| 508 |
+
time.sleep(3)
|
| 509 |
+
else:
|
| 510 |
+
print(f"β οΈ Failed to start {service}, continuing with other services...")
|
| 511 |
+
else:
|
| 512 |
+
manager.start_service(args.service)
|
| 513 |
+
|
| 514 |
+
elif args.action == "stop":
|
| 515 |
+
if args.service == "all":
|
| 516 |
+
manager.stop_all_services()
|
| 517 |
+
else:
|
| 518 |
+
manager.stop_service(args.service)
|
| 519 |
+
|
| 520 |
+
elif args.action == "restart":
|
| 521 |
+
if args.service == "all":
|
| 522 |
+
print("π Restarting all services...")
|
| 523 |
+
manager.stop_all_services()
|
| 524 |
+
time.sleep(2)
|
| 525 |
+
services_order = ["ocr", "rag", "ner", "unified"]
|
| 526 |
+
for service in services_order:
|
| 527 |
+
manager.start_service(service)
|
| 528 |
+
time.sleep(3)
|
| 529 |
+
else:
|
| 530 |
+
print(f"π Restarting {args.service} service...")
|
| 531 |
+
manager.stop_service(args.service)
|
| 532 |
+
time.sleep(2)
|
| 533 |
+
manager.start_service(args.service)
|
| 534 |
+
|
| 535 |
+
elif args.action == "test":
|
| 536 |
+
if args.service == "all":
|
| 537 |
+
for service_name in manager.service_configs.keys():
|
| 538 |
+
await manager.test_service(service_name)
|
| 539 |
+
print() # Add spacing between tests
|
| 540 |
+
else:
|
| 541 |
+
await manager.test_service(args.service)
|
| 542 |
+
|
| 543 |
+
if __name__ == "__main__":
|
| 544 |
+
try:
|
| 545 |
+
asyncio.run(main())
|
| 546 |
+
except KeyboardInterrupt:
|
| 547 |
+
print("\nπ Operation cancelled by user")
|
| 548 |
+
except Exception as e:
|
| 549 |
+
print(f"\nβ Error: {e}")
|
| 550 |
+
sys.exit(1)
|
requirements.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Unified AI Services - Python Dependencies
|
| 2 |
+
# Core framework dependencies
|
| 3 |
+
fastapi>=0.104.1
|
| 4 |
+
uvicorn[standard]>=0.24.0
|
| 5 |
+
pydantic>=2.5.0
|
| 6 |
+
python-multipart>=0.0.6
|
| 7 |
+
|
| 8 |
+
# HTTP client and async support
|
| 9 |
+
httpx>=0.25.0
|
| 10 |
+
aiofiles>=23.2.1
|
| 11 |
+
|
| 12 |
+
# Database dependencies
|
| 13 |
+
asyncpg>=0.29.0
|
| 14 |
+
psycopg2-binary>=2.9.7
|
| 15 |
+
|
| 16 |
+
# Azure services
|
| 17 |
+
azure-ai-inference>=1.0.0
|
| 18 |
+
azure-core>=1.29.0
|
| 19 |
+
azure-storage-blob>=12.19.0
|
| 20 |
+
azure-ai-documentintelligence>=1.0.0
|
| 21 |
+
|
| 22 |
+
# OpenAI integration
|
| 23 |
+
openai>=1.3.0
|
| 24 |
+
|
| 25 |
+
# Document processing
|
| 26 |
+
python-docx>=1.1.0
|
| 27 |
+
beautifulsoup4>=4.12.0
|
| 28 |
+
lxml>=4.9.0
|
| 29 |
+
Pillow>=10.0.0
|
| 30 |
+
|
| 31 |
+
# Utilities
|
| 32 |
+
requests>=2.31.0
|
| 33 |
+
numpy>=1.24.0
|
| 34 |
+
python-dotenv>=1.0.0
|
| 35 |
+
psutil>=5.9.0
|
| 36 |
+
|
| 37 |
+
# Development and testing (optional)
|
| 38 |
+
pytest>=7.4.0
|
| 39 |
+
pytest-asyncio>=0.21.0
|
| 40 |
+
black>=23.0.0
|
| 41 |
+
flake8>=6.0.0
|
| 42 |
+
|
| 43 |
+
# Additional data processing
|
| 44 |
+
pandas>=2.0.0
|
| 45 |
+
scikit-learn>=1.3.0
|
setup.py
ADDED
|
@@ -0,0 +1,511 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Automated Setup and Configuration for Unified AI Services
|
| 4 |
+
Helps set up the environment, validate configurations, and initialize services
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import json
|
| 10 |
+
import asyncio
|
| 11 |
+
import subprocess
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from typing import Dict, List, Optional
|
| 14 |
+
import shutil
|
| 15 |
+
|
| 16 |
+
def print_header(title: str):
|
| 17 |
+
"""Print a formatted header"""
|
| 18 |
+
print("\n" + "=" * 60)
|
| 19 |
+
print(f" {title}")
|
| 20 |
+
print("=" * 60)
|
| 21 |
+
|
| 22 |
+
def print_step(step: str):
|
| 23 |
+
"""Print a step indicator"""
|
| 24 |
+
print(f"\nπ§ {step}")
|
| 25 |
+
|
| 26 |
+
def check_python_version():
|
| 27 |
+
"""Check Python version compatibility"""
|
| 28 |
+
print_step("Checking Python version...")
|
| 29 |
+
|
| 30 |
+
version = sys.version_info
|
| 31 |
+
if version.major < 3 or (version.major == 3 and version.minor < 8):
|
| 32 |
+
print("β Python 3.8 or higher is required")
|
| 33 |
+
print(f" Current version: {version.major}.{version.minor}.{version.micro}")
|
| 34 |
+
return False
|
| 35 |
+
|
| 36 |
+
print(f"β
Python {version.major}.{version.minor}.{version.micro} is compatible")
|
| 37 |
+
return True
|
| 38 |
+
|
| 39 |
+
def create_directory_structure():
|
| 40 |
+
"""Create necessary directory structure"""
|
| 41 |
+
print_step("Creating directory structure...")
|
| 42 |
+
|
| 43 |
+
directories = [
|
| 44 |
+
"services",
|
| 45 |
+
"exports",
|
| 46 |
+
"logs",
|
| 47 |
+
"temp",
|
| 48 |
+
"tests",
|
| 49 |
+
"data"
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
for directory in directories:
|
| 53 |
+
path = Path(directory)
|
| 54 |
+
if not path.exists():
|
| 55 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 56 |
+
print(f" β
Created directory: {directory}")
|
| 57 |
+
else:
|
| 58 |
+
print(f" β Directory exists: {directory}")
|
| 59 |
+
|
| 60 |
+
def check_service_files():
|
| 61 |
+
"""Check if service files exist"""
|
| 62 |
+
print_step("Checking service files...")
|
| 63 |
+
|
| 64 |
+
required_files = {
|
| 65 |
+
"services/ner_service.py": "NER Service",
|
| 66 |
+
"services/ocr_service.py": "OCR Service",
|
| 67 |
+
"services/rag_service.py": "RAG Service",
|
| 68 |
+
"app.py": "Unified Application",
|
| 69 |
+
"configs.py": "Configuration Management"
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
missing_files = []
|
| 73 |
+
|
| 74 |
+
for file_path, description in required_files.items():
|
| 75 |
+
if Path(file_path).exists():
|
| 76 |
+
print(f" β
{description}: {file_path}")
|
| 77 |
+
else:
|
| 78 |
+
print(f" β {description}: {file_path} (MISSING)")
|
| 79 |
+
missing_files.append(file_path)
|
| 80 |
+
|
| 81 |
+
if missing_files:
|
| 82 |
+
print(f"\nβ οΈ Missing files detected:")
|
| 83 |
+
for file_path in missing_files:
|
| 84 |
+
print(f" - {file_path}")
|
| 85 |
+
print("\nPlease ensure all service files are in the correct locations.")
|
| 86 |
+
return False
|
| 87 |
+
|
| 88 |
+
return True
|
| 89 |
+
|
| 90 |
+
def create_env_file():
|
| 91 |
+
"""Create or update .env file with user input"""
|
| 92 |
+
print_step("Setting up environment configuration...")
|
| 93 |
+
|
| 94 |
+
env_path = Path(".env")
|
| 95 |
+
|
| 96 |
+
if env_path.exists():
|
| 97 |
+
response = input(" .env file already exists. Overwrite? (y/N): ")
|
| 98 |
+
if response.lower() != 'y':
|
| 99 |
+
print(" Keeping existing .env file")
|
| 100 |
+
return True
|
| 101 |
+
|
| 102 |
+
print("\nπ Please provide the following configuration values:")
|
| 103 |
+
print(" (Press Enter to use default values shown in brackets)")
|
| 104 |
+
|
| 105 |
+
# Collect configuration values
|
| 106 |
+
config_values = {}
|
| 107 |
+
|
| 108 |
+
# Server Configuration
|
| 109 |
+
print("\nπ Server Configuration:")
|
| 110 |
+
config_values['HOST'] = input(" Host [0.0.0.0]: ") or "0.0.0.0"
|
| 111 |
+
config_values['DEBUG'] = input(" Debug mode (true/false) [True]: ") or "True"
|
| 112 |
+
config_values['MAIN_PORT'] = input(" Main app port [8000]: ") or "8000"
|
| 113 |
+
config_values['NER_PORT'] = input(" NER service port [8500]: ") or "8500"
|
| 114 |
+
config_values['OCR_PORT'] = input(" OCR service port [8400]: ") or "8400"
|
| 115 |
+
config_values['RAG_PORT'] = input(" RAG service port [8401]: ") or "8401"
|
| 116 |
+
|
| 117 |
+
# PostgreSQL Configuration
|
| 118 |
+
print("\nποΈ PostgreSQL Configuration:")
|
| 119 |
+
config_values['POSTGRES_HOST'] = input(" PostgreSQL host: ")
|
| 120 |
+
config_values['POSTGRES_PORT'] = input(" PostgreSQL port [5432]: ") or "5432"
|
| 121 |
+
config_values['POSTGRES_USER'] = input(" PostgreSQL user: ")
|
| 122 |
+
config_values['POSTGRES_PASSWORD'] = input(" PostgreSQL password: ")
|
| 123 |
+
config_values['POSTGRES_DATABASE'] = input(" PostgreSQL database [postgres]: ") or "postgres"
|
| 124 |
+
|
| 125 |
+
# Azure OpenAI Configuration
|
| 126 |
+
print("\nπ€ Azure OpenAI Configuration:")
|
| 127 |
+
config_values['AZURE_OPENAI_ENDPOINT'] = input(" Azure OpenAI endpoint: ")
|
| 128 |
+
config_values['AZURE_OPENAI_API_KEY'] = input(" Azure OpenAI API key: ")
|
| 129 |
+
config_values['EMBEDDING_MODEL'] = input(" Embedding model [text-embedding-3-large]: ") or "text-embedding-3-large"
|
| 130 |
+
|
| 131 |
+
# DeepSeek Configuration
|
| 132 |
+
print("\nπ§ DeepSeek Configuration:")
|
| 133 |
+
config_values['DEEPSEEK_ENDPOINT'] = input(" DeepSeek endpoint: ")
|
| 134 |
+
config_values['DEEPSEEK_API_KEY'] = input(" DeepSeek API key: ")
|
| 135 |
+
config_values['DEEPSEEK_MODEL'] = input(" DeepSeek model [DeepSeek-R1-0528]: ") or "DeepSeek-R1-0528"
|
| 136 |
+
|
| 137 |
+
# Azure Document Intelligence Configuration
|
| 138 |
+
print("\nπ Azure Document Intelligence Configuration:")
|
| 139 |
+
config_values['AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT'] = input(" Document Intelligence endpoint: ")
|
| 140 |
+
config_values['AZURE_DOCUMENT_INTELLIGENCE_KEY'] = input(" Document Intelligence API key: ")
|
| 141 |
+
|
| 142 |
+
# Azure Storage Configuration
|
| 143 |
+
print("\nπΎ Azure Storage Configuration:")
|
| 144 |
+
config_values['AZURE_STORAGE_ACCOUNT_URL'] = input(" Storage account URL: ")
|
| 145 |
+
config_values['AZURE_BLOB_SAS_TOKEN'] = input(" Blob SAS token: ")
|
| 146 |
+
config_values['BLOB_CONTAINER'] = input(" Blob container [historylog]: ") or "historylog"
|
| 147 |
+
|
| 148 |
+
# Write .env file
|
| 149 |
+
try:
|
| 150 |
+
with open(".env", "w") as f:
|
| 151 |
+
f.write("# =================================================================\n")
|
| 152 |
+
f.write("# Unified AI Services - Environment Configuration\n")
|
| 153 |
+
f.write("# Generated by setup.py\n")
|
| 154 |
+
f.write("# =================================================================\n\n")
|
| 155 |
+
|
| 156 |
+
f.write("# Server Configuration\n")
|
| 157 |
+
f.write(f"HOST={config_values['HOST']}\n")
|
| 158 |
+
f.write(f"DEBUG={config_values['DEBUG']}\n")
|
| 159 |
+
f.write(f"MAIN_PORT={config_values['MAIN_PORT']}\n")
|
| 160 |
+
f.write(f"NER_PORT={config_values['NER_PORT']}\n")
|
| 161 |
+
f.write(f"OCR_PORT={config_values['OCR_PORT']}\n")
|
| 162 |
+
f.write(f"RAG_PORT={config_values['RAG_PORT']}\n\n")
|
| 163 |
+
|
| 164 |
+
f.write("# PostgreSQL Configuration\n")
|
| 165 |
+
f.write(f"POSTGRES_HOST={config_values['POSTGRES_HOST']}\n")
|
| 166 |
+
f.write(f"POSTGRES_PORT={config_values['POSTGRES_PORT']}\n")
|
| 167 |
+
f.write(f"POSTGRES_USER={config_values['POSTGRES_USER']}\n")
|
| 168 |
+
f.write(f"POSTGRES_PASSWORD={config_values['POSTGRES_PASSWORD']}\n")
|
| 169 |
+
f.write(f"POSTGRES_DATABASE={config_values['POSTGRES_DATABASE']}\n\n")
|
| 170 |
+
|
| 171 |
+
f.write("# Azure OpenAI Configuration\n")
|
| 172 |
+
f.write(f"AZURE_OPENAI_ENDPOINT={config_values['AZURE_OPENAI_ENDPOINT']}\n")
|
| 173 |
+
f.write(f"AZURE_OPENAI_API_KEY={config_values['AZURE_OPENAI_API_KEY']}\n")
|
| 174 |
+
f.write(f"EMBEDDING_MODEL={config_values['EMBEDDING_MODEL']}\n")
|
| 175 |
+
f.write(f"AZURE_OPENAI_DEPLOYMENT_NAME={config_values['EMBEDDING_MODEL']}\n\n")
|
| 176 |
+
|
| 177 |
+
f.write("# DeepSeek Configuration\n")
|
| 178 |
+
f.write(f"DEEPSEEK_ENDPOINT={config_values['DEEPSEEK_ENDPOINT']}\n")
|
| 179 |
+
f.write(f"DEEPSEEK_API_KEY={config_values['DEEPSEEK_API_KEY']}\n")
|
| 180 |
+
f.write(f"DEEPSEEK_MODEL={config_values['DEEPSEEK_MODEL']}\n\n")
|
| 181 |
+
|
| 182 |
+
f.write("# Azure Document Intelligence Configuration\n")
|
| 183 |
+
f.write(f"AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT={config_values['AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT']}\n")
|
| 184 |
+
f.write(f"AZURE_DOCUMENT_INTELLIGENCE_KEY={config_values['AZURE_DOCUMENT_INTELLIGENCE_KEY']}\n\n")
|
| 185 |
+
|
| 186 |
+
f.write("# Azure Storage Configuration\n")
|
| 187 |
+
f.write(f"AZURE_STORAGE_ACCOUNT_URL={config_values['AZURE_STORAGE_ACCOUNT_URL']}\n")
|
| 188 |
+
f.write(f"AZURE_BLOB_SAS_TOKEN=\"{config_values['AZURE_BLOB_SAS_TOKEN']}\"\n")
|
| 189 |
+
f.write(f"BLOB_CONTAINER={config_values['BLOB_CONTAINER']}\n\n")
|
| 190 |
+
|
| 191 |
+
f.write("# Processing Configuration\n")
|
| 192 |
+
f.write("MAX_FILE_SIZE=50\n")
|
| 193 |
+
f.write("REQUEST_TIMEOUT=300\n")
|
| 194 |
+
f.write("LOG_LEVEL=INFO\n")
|
| 195 |
+
f.write("ALLOWED_ORIGINS=*\n")
|
| 196 |
+
|
| 197 |
+
f.write("\n# RAG Specific Configuration\n")
|
| 198 |
+
f.write("PG_HOST=${POSTGRES_HOST}\n")
|
| 199 |
+
f.write("PG_PORT=${POSTGRES_PORT}\n")
|
| 200 |
+
f.write("PG_DATABASE=vectorsearch\n")
|
| 201 |
+
f.write("PG_USER=${POSTGRES_USER}\n")
|
| 202 |
+
f.write("PG_PASSWORD=${POSTGRES_PASSWORD}\n")
|
| 203 |
+
f.write("PG_SSL_MODE=require\n")
|
| 204 |
+
f.write("CHUNK_SIZE=1000\n")
|
| 205 |
+
f.write("CHUNK_OVERLAP=200\n")
|
| 206 |
+
f.write("MIN_CHUNK_SIZE=50\n")
|
| 207 |
+
|
| 208 |
+
print(" β
.env file created successfully")
|
| 209 |
+
return True
|
| 210 |
+
|
| 211 |
+
except Exception as e:
|
| 212 |
+
print(f" β Failed to create .env file: {e}")
|
| 213 |
+
return False
|
| 214 |
+
|
| 215 |
+
def install_dependencies():
|
| 216 |
+
"""Install Python dependencies"""
|
| 217 |
+
print_step("Installing Python dependencies...")
|
| 218 |
+
|
| 219 |
+
# Create requirements.txt if it doesn't exist
|
| 220 |
+
requirements_path = Path("requirements.txt")
|
| 221 |
+
if not requirements_path.exists():
|
| 222 |
+
print(" Creating requirements.txt file...")
|
| 223 |
+
|
| 224 |
+
requirements = [
|
| 225 |
+
"fastapi>=0.104.1",
|
| 226 |
+
"uvicorn[standard]>=0.24.0",
|
| 227 |
+
"httpx>=0.25.0",
|
| 228 |
+
"asyncpg>=0.29.0",
|
| 229 |
+
"psutil>=5.9.0",
|
| 230 |
+
"pydantic>=2.5.0",
|
| 231 |
+
"python-dotenv>=1.0.0",
|
| 232 |
+
"python-multipart>=0.0.6",
|
| 233 |
+
"azure-ai-inference>=1.0.0",
|
| 234 |
+
"azure-core>=1.29.0",
|
| 235 |
+
"azure-storage-blob>=12.19.0",
|
| 236 |
+
"azure-ai-documentintelligence>=1.0.0",
|
| 237 |
+
"openai>=1.3.0",
|
| 238 |
+
"beautifulsoup4>=4.12.0",
|
| 239 |
+
"requests>=2.31.0",
|
| 240 |
+
"numpy>=1.24.0",
|
| 241 |
+
"Pillow>=10.0.0",
|
| 242 |
+
"python-docx>=1.1.0",
|
| 243 |
+
"lxml>=4.9.0",
|
| 244 |
+
"aiofiles>=23.2.1"
|
| 245 |
+
]
|
| 246 |
+
|
| 247 |
+
try:
|
| 248 |
+
with open("requirements.txt", "w") as f:
|
| 249 |
+
for req in requirements:
|
| 250 |
+
f.write(f"{req}\n")
|
| 251 |
+
print(" β
requirements.txt created")
|
| 252 |
+
except Exception as e:
|
| 253 |
+
print(f" β Failed to create requirements.txt: {e}")
|
| 254 |
+
return False
|
| 255 |
+
|
| 256 |
+
# Install dependencies
|
| 257 |
+
try:
|
| 258 |
+
print(" Installing dependencies (this may take a few minutes)...")
|
| 259 |
+
result = subprocess.run([
|
| 260 |
+
sys.executable, "-m", "pip", "install", "-r", "requirements.txt"
|
| 261 |
+
], capture_output=True, text=True)
|
| 262 |
+
|
| 263 |
+
if result.returncode == 0:
|
| 264 |
+
print(" β
Dependencies installed successfully")
|
| 265 |
+
return True
|
| 266 |
+
else:
|
| 267 |
+
print(f" β Failed to install dependencies:")
|
| 268 |
+
print(f" {result.stderr}")
|
| 269 |
+
return False
|
| 270 |
+
|
| 271 |
+
except Exception as e:
|
| 272 |
+
print(f" β Error installing dependencies: {e}")
|
| 273 |
+
return False
|
| 274 |
+
|
| 275 |
+
def validate_configuration():
|
| 276 |
+
"""Validate the configuration"""
|
| 277 |
+
print_step("Validating configuration...")
|
| 278 |
+
|
| 279 |
+
try:
|
| 280 |
+
from configs import get_config, validate_environment
|
| 281 |
+
|
| 282 |
+
config = get_config()
|
| 283 |
+
config.print_configuration_summary()
|
| 284 |
+
|
| 285 |
+
if validate_environment():
|
| 286 |
+
print("\n β
Configuration validation passed!")
|
| 287 |
+
return True
|
| 288 |
+
else:
|
| 289 |
+
print("\n β Configuration validation failed!")
|
| 290 |
+
print(" Please check your .env file and update missing values.")
|
| 291 |
+
return False
|
| 292 |
+
|
| 293 |
+
except ImportError as e:
|
| 294 |
+
print(f" β Failed to import configuration module: {e}")
|
| 295 |
+
return False
|
| 296 |
+
except Exception as e:
|
| 297 |
+
print(f" β Configuration validation error: {e}")
|
| 298 |
+
return False
|
| 299 |
+
|
| 300 |
+
async def test_database_connection():
|
| 301 |
+
"""Test database connection"""
|
| 302 |
+
print_step("Testing database connection...")
|
| 303 |
+
|
| 304 |
+
try:
|
| 305 |
+
from configs import get_config
|
| 306 |
+
import asyncpg
|
| 307 |
+
|
| 308 |
+
config = get_config()
|
| 309 |
+
|
| 310 |
+
# Test connection
|
| 311 |
+
conn = await asyncpg.connect(
|
| 312 |
+
host=config.ner.POSTGRES_HOST,
|
| 313 |
+
port=config.ner.POSTGRES_PORT,
|
| 314 |
+
database=config.ner.POSTGRES_DATABASE,
|
| 315 |
+
user=config.ner.POSTGRES_USER,
|
| 316 |
+
password=config.ner.POSTGRES_PASSWORD,
|
| 317 |
+
ssl='require',
|
| 318 |
+
timeout=10
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
# Test basic query
|
| 322 |
+
version = await conn.fetchval("SELECT version()")
|
| 323 |
+
await conn.close()
|
| 324 |
+
|
| 325 |
+
print(" β
Database connection successful")
|
| 326 |
+
print(f" Database version: {version[:50]}...")
|
| 327 |
+
return True
|
| 328 |
+
|
| 329 |
+
except Exception as e:
|
| 330 |
+
print(f" β Database connection failed: {e}")
|
| 331 |
+
print("\n π‘ Troubleshooting tips:")
|
| 332 |
+
print(" 1. Check your PostgreSQL server is running")
|
| 333 |
+
print(" 2. Verify host, port, username, and password")
|
| 334 |
+
print(" 3. Ensure your IP is allowlisted in firewall rules")
|
| 335 |
+
print(" 4. Check SSL configuration")
|
| 336 |
+
return False
|
| 337 |
+
|
| 338 |
+
def create_startup_script():
|
| 339 |
+
"""Create startup script for easy service management"""
|
| 340 |
+
print_step("Creating startup script...")
|
| 341 |
+
|
| 342 |
+
# Create startup script for Windows
|
| 343 |
+
if sys.platform == "win32":
|
| 344 |
+
script_content = """@echo off
|
| 345 |
+
echo Starting Unified AI Services...
|
| 346 |
+
echo.
|
| 347 |
+
|
| 348 |
+
echo Starting in 3 seconds...
|
| 349 |
+
timeout /t 3 /nobreak >nul
|
| 350 |
+
|
| 351 |
+
echo Starting unified application...
|
| 352 |
+
python app.py
|
| 353 |
+
|
| 354 |
+
pause
|
| 355 |
+
"""
|
| 356 |
+
with open("start_services.bat", "w") as f:
|
| 357 |
+
f.write(script_content)
|
| 358 |
+
print(" β
Created start_services.bat")
|
| 359 |
+
|
| 360 |
+
# Create startup script for Unix/Linux/Mac
|
| 361 |
+
else:
|
| 362 |
+
script_content = """#!/bin/bash
|
| 363 |
+
|
| 364 |
+
echo "Starting Unified AI Services..."
|
| 365 |
+
echo
|
| 366 |
+
|
| 367 |
+
echo "Starting in 3 seconds..."
|
| 368 |
+
sleep 3
|
| 369 |
+
|
| 370 |
+
echo "Starting unified application..."
|
| 371 |
+
python app.py
|
| 372 |
+
"""
|
| 373 |
+
with open("start_services.sh", "w") as f:
|
| 374 |
+
f.write(script_content)
|
| 375 |
+
|
| 376 |
+
# Make executable
|
| 377 |
+
os.chmod("start_services.sh", 0o755)
|
| 378 |
+
print(" β
Created start_services.sh")
|
| 379 |
+
|
| 380 |
+
return True
|
| 381 |
+
|
| 382 |
+
def create_test_script():
|
| 383 |
+
"""Create test script for easy testing"""
|
| 384 |
+
print_step("Creating test script...")
|
| 385 |
+
|
| 386 |
+
# Create test script for Windows
|
| 387 |
+
if sys.platform == "win32":
|
| 388 |
+
script_content = """@echo off
|
| 389 |
+
echo Running Unified System Tests...
|
| 390 |
+
echo.
|
| 391 |
+
|
| 392 |
+
echo Make sure the unified application is running first!
|
| 393 |
+
echo Press any key to continue or Ctrl+C to cancel...
|
| 394 |
+
pause >nul
|
| 395 |
+
|
| 396 |
+
echo Running comprehensive tests...
|
| 397 |
+
python test_unified.py
|
| 398 |
+
|
| 399 |
+
pause
|
| 400 |
+
"""
|
| 401 |
+
with open("run_tests.bat", "w") as f:
|
| 402 |
+
f.write(script_content)
|
| 403 |
+
print(" β
Created run_tests.bat")
|
| 404 |
+
|
| 405 |
+
# Create test script for Unix/Linux/Mac
|
| 406 |
+
else:
|
| 407 |
+
script_content = """#!/bin/bash
|
| 408 |
+
|
| 409 |
+
echo "Running Unified System Tests..."
|
| 410 |
+
echo
|
| 411 |
+
|
| 412 |
+
echo "Make sure the unified application is running first!"
|
| 413 |
+
read -p "Press Enter to continue or Ctrl+C to cancel..."
|
| 414 |
+
|
| 415 |
+
echo "Running comprehensive tests..."
|
| 416 |
+
python test_unified.py
|
| 417 |
+
"""
|
| 418 |
+
with open("run_tests.sh", "w") as f:
|
| 419 |
+
f.write(script_content)
|
| 420 |
+
|
| 421 |
+
# Make executable
|
| 422 |
+
os.chmod("run_tests.sh", 0o755)
|
| 423 |
+
print(" β
Created run_tests.sh")
|
| 424 |
+
|
| 425 |
+
return True
|
| 426 |
+
|
| 427 |
+
def main():
|
| 428 |
+
"""Main setup function"""
|
| 429 |
+
print_header("Unified AI Services - Automated Setup")
|
| 430 |
+
|
| 431 |
+
print("This script will help you set up the Unified AI Services application.")
|
| 432 |
+
print("It will:")
|
| 433 |
+
print(" β’ Check your Python environment")
|
| 434 |
+
print(" β’ Create necessary directories")
|
| 435 |
+
print(" β’ Check for required service files")
|
| 436 |
+
print(" β’ Set up configuration (.env file)")
|
| 437 |
+
print(" β’ Install Python dependencies")
|
| 438 |
+
print(" β’ Validate configuration")
|
| 439 |
+
print(" β’ Test database connection")
|
| 440 |
+
print(" β’ Create startup and test scripts")
|
| 441 |
+
|
| 442 |
+
response = input("\nProceed with setup? (Y/n): ")
|
| 443 |
+
if response.lower() == 'n':
|
| 444 |
+
print("Setup cancelled.")
|
| 445 |
+
return
|
| 446 |
+
|
| 447 |
+
setup_steps = [
|
| 448 |
+
("Python Version Check", check_python_version),
|
| 449 |
+
("Directory Structure", create_directory_structure),
|
| 450 |
+
("Service Files Check", check_service_files),
|
| 451 |
+
("Environment Configuration", create_env_file),
|
| 452 |
+
("Dependencies Installation", install_dependencies),
|
| 453 |
+
("Configuration Validation", validate_configuration),
|
| 454 |
+
("Startup Scripts", create_startup_script),
|
| 455 |
+
("Test Scripts", create_test_script),
|
| 456 |
+
]
|
| 457 |
+
|
| 458 |
+
failed_steps = []
|
| 459 |
+
|
| 460 |
+
for step_name, step_func in setup_steps:
|
| 461 |
+
try:
|
| 462 |
+
if not step_func():
|
| 463 |
+
failed_steps.append(step_name)
|
| 464 |
+
except Exception as e:
|
| 465 |
+
print(f" β {step_name} failed with exception: {e}")
|
| 466 |
+
failed_steps.append(step_name)
|
| 467 |
+
|
| 468 |
+
# Database connection test (optional)
|
| 469 |
+
print_step("Testing database connection (optional)...")
|
| 470 |
+
try:
|
| 471 |
+
asyncio.run(test_database_connection())
|
| 472 |
+
except Exception as e:
|
| 473 |
+
print(f" β οΈ Database test skipped: {e}")
|
| 474 |
+
|
| 475 |
+
# Final summary
|
| 476 |
+
print_header("Setup Summary")
|
| 477 |
+
|
| 478 |
+
if not failed_steps:
|
| 479 |
+
print("π Setup completed successfully!")
|
| 480 |
+
print("\nNext steps:")
|
| 481 |
+
print("1. Review the .env file and update any missing values")
|
| 482 |
+
print("2. Start the unified application:")
|
| 483 |
+
if sys.platform == "win32":
|
| 484 |
+
print(" β’ Double-click start_services.bat")
|
| 485 |
+
print(" β’ Or run: python app.py")
|
| 486 |
+
else:
|
| 487 |
+
print(" β’ Run: ./start_services.sh")
|
| 488 |
+
print(" β’ Or run: python app.py")
|
| 489 |
+
print("3. Test the system:")
|
| 490 |
+
if sys.platform == "win32":
|
| 491 |
+
print(" β’ Double-click run_tests.bat")
|
| 492 |
+
print(" β’ Or run: python test_unified.py")
|
| 493 |
+
else:
|
| 494 |
+
print(" β’ Run: ./run_tests.sh")
|
| 495 |
+
print(" β’ Or run: python test_unified.py")
|
| 496 |
+
print("4. Access the API documentation at: http://localhost:8000/docs")
|
| 497 |
+
|
| 498 |
+
else:
|
| 499 |
+
print("β οΈ Setup completed with some issues:")
|
| 500 |
+
for step in failed_steps:
|
| 501 |
+
print(f" β {step}")
|
| 502 |
+
|
| 503 |
+
print("\nPlease resolve the failed steps before proceeding.")
|
| 504 |
+
print("You may need to:")
|
| 505 |
+
print("β’ Check your internet connection for dependency installation")
|
| 506 |
+
print("β’ Verify your Azure service credentials")
|
| 507 |
+
print("β’ Ensure PostgreSQL is accessible")
|
| 508 |
+
print("β’ Check file permissions")
|
| 509 |
+
|
| 510 |
+
if __name__ == "__main__":
|
| 511 |
+
main()
|
test.py
ADDED
|
@@ -0,0 +1,1055 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Comprehensive Test Suite for Unified AI Services
|
| 4 |
+
Tests the unified application and all integrated services (NER, OCR, RAG)
|
| 5 |
+
Combines functionality from test_rag.py and test_ner.py with new unified tests
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import asyncio
|
| 9 |
+
import httpx
|
| 10 |
+
import json
|
| 11 |
+
import io
|
| 12 |
+
import sys
|
| 13 |
+
import time
|
| 14 |
+
import tempfile
|
| 15 |
+
import os
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from typing import Dict, List, Any, Optional, Tuple
|
| 18 |
+
import uuid as python_uuid
|
| 19 |
+
|
| 20 |
+
# Import configuration
|
| 21 |
+
try:
|
| 22 |
+
from configs import get_config, validate_environment
|
| 23 |
+
config = get_config()
|
| 24 |
+
except ImportError:
|
| 25 |
+
print("β οΈ Could not import configs. Using default values.")
|
| 26 |
+
config = None
|
| 27 |
+
|
| 28 |
+
# Test configuration
|
| 29 |
+
UNIFIED_URL = "http://localhost:8000" # Main unified app
|
| 30 |
+
NER_URL = "http://localhost:8500" # Direct NER service
|
| 31 |
+
OCR_URL = "http://localhost:8400" # Direct OCR service
|
| 32 |
+
RAG_URL = "http://localhost:8401" # Direct RAG service
|
| 33 |
+
TEST_TIMEOUT = 300
|
| 34 |
+
|
| 35 |
+
# Test data (from original test files)
|
| 36 |
+
THAI_CYANIDE_MURDER_CASE = """
|
| 37 |
+
ΰΉΰΈ«ΰΈΰΈΈΰΈΰΈ²ΰΈΰΈΰΈ£ΰΈ£ΰΈ‘ΰΈΰΉΰΈ§ΰΈ’ΰΉΰΈΰΈ’ΰΈ²ΰΉΰΈΰΈΰΉ ΰΈ.ΰΈ¨. 2566
|
| 38 |
+
|
| 39 |
+
ΰΈΰΈΰΈ΅ΰΈΰΈ²ΰΈΰΈΰΈ£ΰΈ£ΰΈ‘ΰΈΰΉΰΈΰΉΰΈΰΈ·ΰΉΰΈΰΈΰΈΰΈ΅ΰΉΰΈͺΰΈ±ΰΉΰΈΰΈͺΰΈ°ΰΉΰΈΰΈ·ΰΈΰΈΰΈͺΰΈ±ΰΈΰΈΰΈ‘ΰΉΰΈΰΈ’ ΰΉΰΈΰΈ΄ΰΈΰΈΰΈΆΰΉΰΈΰΈ£ΰΈ°ΰΈ«ΰΈ§ΰΉΰΈ²ΰΈΰΉΰΈΰΈ·ΰΈΰΈΰΉΰΈ‘ΰΈ©ΰΈ²ΰΈ’ΰΈ-ΰΈΰΈΈΰΈ₯ΰΈ²ΰΈΰΈ‘ ΰΈ.ΰΈ¨. 2566
|
| 40 |
+
ΰΉΰΈΰΈ’ΰΈ‘ΰΈ΅ΰΈΰΈ²ΰΈΰΈͺΰΈ²ΰΈ§ΰΈͺΰΈ²ΰΈ£ΰΈ΄ΰΈΰΈ΅ ΰΈΰΈ±ΰΈ’ΰΈ§ΰΈ±ΰΈΰΈΰΉ ΰΈ«ΰΈ£ΰΈ·ΰΈ "ΰΉΰΈΰΈ‘ ΰΉΰΈΰΈ’ΰΈ²ΰΉΰΈΰΈΰΉ" ΰΈΰΈ²ΰΈ’ΰΈΈ 36 ΰΈΰΈ΅ ΰΉΰΈΰΉΰΈΰΈΰΈΉΰΉΰΈΰΉΰΈΰΈΰΈ«ΰΈ²
|
| 41 |
+
|
| 42 |
+
ΰΈ£ΰΈ²ΰΈ’ΰΈ₯ΰΈ°ΰΉΰΈΰΈ΅ΰΈ’ΰΈΰΈΰΈΰΈ΅:
|
| 43 |
+
ΰΈΰΈΉΰΉΰΈΰΉΰΈΰΈΰΈ«ΰΈ²ΰΉΰΈΰΉΰΈΰΈ³ΰΈΰΈ²ΰΈ£ΰΈ§ΰΈ²ΰΈΰΈ’ΰΈ²ΰΈΰΈ΄ΰΈ©ΰΉΰΈΰΈ’ΰΈ²ΰΉΰΈΰΈΰΉ (Potassium Cyanide) ΰΉΰΈΰΈΰΈ²ΰΈ«ΰΈ²ΰΈ£ΰΉΰΈ₯ΰΈ°ΰΉΰΈΰΈ£ΰΈ·ΰΉΰΈΰΈΰΈΰΈ·ΰΉΰΈ‘ΰΈΰΈΰΈΰΉΰΈ«ΰΈ’ΰΈ·ΰΉΰΈΰΈ«ΰΈ₯ΰΈ²ΰΈ’ΰΈ£ΰΈ²ΰΈ’
|
| 44 |
+
ΰΉΰΈ«ΰΈ’ΰΈ·ΰΉΰΈΰΈ£ΰΈ²ΰΈ’ΰΉΰΈ£ΰΈΰΈΰΈ·ΰΈ ΰΈΰΈ²ΰΈΰΈͺΰΈ΄ΰΈ£ΰΈ΄ΰΈΰΈ£ ΰΈΰΈΈΰΈΰΈ₯ΰΈ²ΰΈ ΰΈ§ΰΈΰΈ΄ΰΈ ΰΈΰΈ²ΰΈ’ΰΈΈ 32 ΰΈΰΈ΅ ΰΉΰΈͺΰΈ΅ΰΈ’ΰΈΰΈ΅ΰΈ§ΰΈ΄ΰΈΰΉΰΈ‘ΰΈ·ΰΉΰΈΰΈ§ΰΈ±ΰΈΰΈΰΈ΅ΰΉ 14 ΰΉΰΈ‘ΰΈ©ΰΈ²ΰΈ’ΰΈ 2566 ΰΈΰΈ΅ΰΉΰΈΰΈ±ΰΈΰΈ«ΰΈ§ΰΈ±ΰΈΰΈΰΈ²ΰΈΰΈΰΈΰΈΰΈΈΰΈ£ΰΈ΅
|
| 45 |
+
ΰΉΰΈ«ΰΈ’ΰΈ·ΰΉΰΈΰΈ£ΰΈ²ΰΈ’ΰΈΰΈ΅ΰΉΰΈͺΰΈΰΈ ΰΈΰΈ²ΰΈ’ΰΈͺΰΈΈΰΈ£ΰΈΰΈ±ΰΈ’ ΰΈΰΈ’ΰΈΉΰΉΰΈΰΈΰΈΰΈ₯ΰΈ±ΰΈ ΰΈΰΈ²ΰΈ’ΰΈΈ 45 ΰΈΰΈ΅ ΰΉΰΈͺΰΈ΅ΰΈ’ΰΈΰΈ΅ΰΈ§ΰΈ΄ΰΈΰΉΰΈ‘ΰΈ·ΰΉΰΈΰΈ§ΰΈ±ΰΈΰΈΰΈ΅ΰΉ 2 ΰΈΰΈ€ΰΈ©ΰΈ ΰΈ²ΰΈΰΈ‘ 2566 ΰΈΰΈ΅ΰΉΰΈΰΈ±ΰΈΰΈ«ΰΈ§ΰΈ±ΰΈΰΈ£ΰΈ²ΰΈΰΈΰΈΈΰΈ£ΰΈ΅
|
| 46 |
+
|
| 47 |
+
ΰΈΰΈ²ΰΈ£ΰΈͺΰΈ·ΰΈΰΈͺΰΈ§ΰΈ:
|
| 48 |
+
ΰΈΰΈ³ΰΈ£ΰΈ§ΰΈΰΈ ΰΈΉΰΈΰΈ£ΰΈ ΰΈ²ΰΈ 7 ΰΈ£ΰΉΰΈ§ΰΈ‘ΰΈΰΈ±ΰΈ ΰΈͺΰΈ³ΰΈΰΈ±ΰΈΰΈΰΈ²ΰΈΰΈΰΈ³ΰΈ£ΰΈ§ΰΈΰΉΰΈ«ΰΉΰΈΰΈΰΈ²ΰΈΰΈ΄ ΰΈΰΈ³ΰΈΰΈ²ΰΈ£ΰΈͺΰΈ·ΰΈΰΈͺΰΈ§ΰΈ
|
| 49 |
+
ΰΈΰΈΰΈ«ΰΈ₯ΰΈ±ΰΈΰΈΰΈ²ΰΈΰΈΰΈ²ΰΈΰΈΰΈ₯ΰΉΰΈΰΈΰΈ§ΰΈΰΈΰΈ£ΰΈΰΈ΄ΰΈ (CCTV) ΰΉΰΈΰΈ«ΰΈ₯ΰΈ²ΰΈ’ΰΈΰΈ·ΰΉΰΈΰΈΰΈ΅ΰΉ
|
| 50 |
+
ΰΈΰΈ£ΰΈ§ΰΈΰΈΰΈΰΈͺΰΈ²ΰΈ£ΰΉΰΈΰΈ’ΰΈ²ΰΉΰΈΰΈΰΉΰΉΰΈΰΈ£ΰΉΰΈ²ΰΈΰΈΰΈ²ΰΈ’ΰΉΰΈ«ΰΈ’ΰΈ·ΰΉΰΈΰΈΰΈΈΰΈΰΈ£ΰΈ²ΰΈ’
|
| 51 |
+
|
| 52 |
+
ΰΈΰΈ²ΰΈ£ΰΈΰΈ±ΰΈΰΈΰΈΈΰΈ‘:
|
| 53 |
+
ΰΈ§ΰΈ±ΰΈΰΈΰΈ΅ΰΉ 3 ΰΈΰΈΈΰΈ₯ΰΈ²ΰΈΰΈ‘ 2566 ΰΈΰΈ³ΰΈ£ΰΈ§ΰΈΰΈΰΈ±ΰΈΰΈΰΈΈΰΈ‘ΰΈΰΈ±ΰΈ§ΰΈΰΈΉΰΉΰΈΰΉΰΈΰΈΰΈ«ΰΈ²ΰΉΰΈΰΉΰΈΰΈ΅ΰΉΰΉΰΈ£ΰΈΰΉΰΈ£ΰΈ‘ΰΉΰΈΰΈΰΈ° ΰΈΰΈ²ΰΈ’ΰΉΰΈΰΈ ΰΈΰΈ±ΰΉΰΈΰΈΰΈ’ΰΈΉΰΉΰΈΰΈ΅ΰΉ ΰΈΰΈΰΈΰΈ£ΰΈ²ΰΈ‘ΰΈΰΈ³ΰΉΰΈ«ΰΈ ΰΈΰΈ£ΰΈΈΰΈΰΉΰΈΰΈΰΈ‘ΰΈ«ΰΈ²ΰΈΰΈΰΈ£
|
| 54 |
+
ΰΈΰΈΰΉΰΈΰΈΰΈͺΰΈ²ΰΈ£ΰΈΰΈ₯ΰΈΰΈ‘ ΰΈΰΈ±ΰΈΰΈ£ΰΈΰΈ£ΰΈ°ΰΈΰΈ²ΰΈΰΈΰΈΰΈ₯ΰΈΰΈ‘ ΰΉΰΈ₯ΰΈ°ΰΈ§ΰΈ±ΰΈΰΈΰΈΈΰΈΰΈ’ΰΈ²ΰΈΰΈͺΰΈ³ΰΈΰΈ±ΰΈΰΈΰΈ·ΰΉΰΈΰΉ
|
| 55 |
+
ΰΈ’ΰΈΆΰΈΰΈΰΈ£ΰΈ±ΰΈΰΈ’ΰΉΰΈͺΰΈ΄ΰΈΰΈΰΈ΅ΰΉΰΉΰΈΰΉΰΈΰΈ²ΰΈΰΈΰΈ²ΰΈ£ΰΈΰΈ£ΰΈ°ΰΈΰΈ³ΰΈΰΈ΄ΰΈ ΰΈ‘ΰΈΉΰΈ₯ΰΈΰΉΰΈ²ΰΈ£ΰΈ§ΰΈ‘ΰΈΰΈ§ΰΉΰΈ² 2 ΰΈ₯ΰΉΰΈ²ΰΈΰΈΰΈ²ΰΈ
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
ENGLISH_CYBERSECURITY_CASE = """
|
| 59 |
+
Major Cybersecurity Incident Report - Operation Digital Shield
|
| 60 |
+
|
| 61 |
+
Incident Overview:
|
| 62 |
+
On October 15, 2024, CyberDefense Corp, a leading cybersecurity firm headquartered in Austin, Texas, detected a sophisticated Advanced Persistent Threat (APT) targeting critical infrastructure across Southeast Asia.
|
| 63 |
+
|
| 64 |
+
Key Personnel:
|
| 65 |
+
- Dr. Sarah Chen, Chief Security Officer at CyberDefense Corp
|
| 66 |
+
- Agent Michael Rodriguez, FBI Cyber Division
|
| 67 |
+
- Captain Lisa Thompson, US Cyber Command
|
| 68 |
+
|
| 69 |
+
Technical Details:
|
| 70 |
+
The attackers used a custom malware strain called "DeepStrike" developed by the Shadow Dragon group
|
| 71 |
+
Primary attack vector: spear-phishing emails containing weaponized PDF documents
|
| 72 |
+
Estimated financial damage: $50 million USD across affected organizations
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
TEST_URLS = [
|
| 76 |
+
"https://httpbin.org/html",
|
| 77 |
+
"https://httpbin.org/json"
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
class TestResult:
|
| 81 |
+
"""Class to track test results"""
|
| 82 |
+
def __init__(self):
|
| 83 |
+
self.total_tests = 0
|
| 84 |
+
self.passed_tests = 0
|
| 85 |
+
self.failed_tests = 0
|
| 86 |
+
self.test_results = []
|
| 87 |
+
self.warnings = []
|
| 88 |
+
|
| 89 |
+
def add_result(self, test_name: str, passed: bool, message: str = "", details: Dict = None):
|
| 90 |
+
"""Add a test result"""
|
| 91 |
+
self.total_tests += 1
|
| 92 |
+
if passed:
|
| 93 |
+
self.passed_tests += 1
|
| 94 |
+
print(f"β
{test_name}")
|
| 95 |
+
if message:
|
| 96 |
+
print(f" {message}")
|
| 97 |
+
else:
|
| 98 |
+
self.failed_tests += 1
|
| 99 |
+
print(f"β {test_name}: {message}")
|
| 100 |
+
|
| 101 |
+
self.test_results.append({
|
| 102 |
+
'test_name': test_name,
|
| 103 |
+
'passed': passed,
|
| 104 |
+
'message': message,
|
| 105 |
+
'details': details or {}
|
| 106 |
+
})
|
| 107 |
+
|
| 108 |
+
def add_warning(self, test_name: str, message: str):
|
| 109 |
+
"""Add a warning (doesn't count as pass/fail)"""
|
| 110 |
+
print(f"β οΈ {test_name}: {message}")
|
| 111 |
+
self.warnings.append({
|
| 112 |
+
'test_name': test_name,
|
| 113 |
+
'message': message
|
| 114 |
+
})
|
| 115 |
+
|
| 116 |
+
def print_summary(self):
|
| 117 |
+
"""Print test summary"""
|
| 118 |
+
print("\n" + "="*60)
|
| 119 |
+
print("UNIFIED SYSTEM TEST SUMMARY")
|
| 120 |
+
print("="*60)
|
| 121 |
+
print(f"Total Tests: {self.total_tests}")
|
| 122 |
+
print(f"Passed: {self.passed_tests}")
|
| 123 |
+
print(f"Failed: {self.failed_tests}")
|
| 124 |
+
print(f"Warnings: {len(self.warnings)}")
|
| 125 |
+
print(f"Success Rate: {(self.passed_tests/self.total_tests*100):.1f}%" if self.total_tests > 0 else "0%")
|
| 126 |
+
|
| 127 |
+
if self.failed_tests > 0:
|
| 128 |
+
print(f"\nβ FAILED TESTS:")
|
| 129 |
+
for result in self.test_results:
|
| 130 |
+
if not result['passed']:
|
| 131 |
+
print(f" - {result['test_name']}: {result['message']}")
|
| 132 |
+
|
| 133 |
+
if self.warnings:
|
| 134 |
+
print(f"\nβ οΈ WARNINGS:")
|
| 135 |
+
for warning in self.warnings:
|
| 136 |
+
print(f" - {warning['test_name']}: {warning['message']}")
|
| 137 |
+
|
| 138 |
+
class UnifiedSystemTester:
|
| 139 |
+
"""Main test class for unified system"""
|
| 140 |
+
|
| 141 |
+
def __init__(self):
|
| 142 |
+
self.result = TestResult()
|
| 143 |
+
self.session = None
|
| 144 |
+
self.created_documents = [] # Track for cleanup
|
| 145 |
+
self.created_analyses = [] # Track for cleanup
|
| 146 |
+
|
| 147 |
+
async def __aenter__(self):
|
| 148 |
+
self.session = httpx.AsyncClient(timeout=TEST_TIMEOUT)
|
| 149 |
+
return self
|
| 150 |
+
|
| 151 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
| 152 |
+
if self.session:
|
| 153 |
+
await self.session.aclose()
|
| 154 |
+
|
| 155 |
+
async def make_request(self, method: str, url: str, **kwargs) -> httpx.Response:
|
| 156 |
+
"""Make HTTP request with error handling"""
|
| 157 |
+
try:
|
| 158 |
+
response = await self.session.request(method, url, **kwargs)
|
| 159 |
+
return response
|
| 160 |
+
except httpx.RequestError as e:
|
| 161 |
+
raise Exception(f"Request failed: {e}")
|
| 162 |
+
|
| 163 |
+
async def test_unified_app_health(self):
|
| 164 |
+
"""Test 1: Unified Application Health Check"""
|
| 165 |
+
print("π Test 1: Unified Application Health Check")
|
| 166 |
+
try:
|
| 167 |
+
response = await self.make_request('GET', f"{UNIFIED_URL}/health")
|
| 168 |
+
|
| 169 |
+
if response.status_code == 200:
|
| 170 |
+
data = response.json()
|
| 171 |
+
status = data.get("status")
|
| 172 |
+
services = data.get("services", [])
|
| 173 |
+
|
| 174 |
+
healthy_services = [s for s in services if s.get("health")]
|
| 175 |
+
total_services = len(services)
|
| 176 |
+
|
| 177 |
+
if status in ["healthy", "degraded"] and healthy_services:
|
| 178 |
+
message = f"Status: {status}, Services: {len(healthy_services)}/{total_services} healthy"
|
| 179 |
+
for service in services:
|
| 180 |
+
service_status = "β
" if service.get("health") else "β"
|
| 181 |
+
message += f"\n {service_status} {service.get('name')}: {service.get('status')} ({service.get('response_time', 0):.3f}s)"
|
| 182 |
+
|
| 183 |
+
self.result.add_result(
|
| 184 |
+
"Unified App Health Check",
|
| 185 |
+
True,
|
| 186 |
+
message,
|
| 187 |
+
data
|
| 188 |
+
)
|
| 189 |
+
return True
|
| 190 |
+
else:
|
| 191 |
+
self.result.add_result(
|
| 192 |
+
"Unified App Health Check",
|
| 193 |
+
False,
|
| 194 |
+
f"System unhealthy: {data}"
|
| 195 |
+
)
|
| 196 |
+
return False
|
| 197 |
+
else:
|
| 198 |
+
self.result.add_result(
|
| 199 |
+
"Unified App Health Check",
|
| 200 |
+
False,
|
| 201 |
+
f"HTTP {response.status_code}: {response.text}"
|
| 202 |
+
)
|
| 203 |
+
return False
|
| 204 |
+
except Exception as e:
|
| 205 |
+
# Provide detailed diagnostics for connection failures
|
| 206 |
+
if "connection" in str(e).lower():
|
| 207 |
+
print(f"\nπ Connection Diagnostics:")
|
| 208 |
+
print(f" Unified App URL: {UNIFIED_URL}")
|
| 209 |
+
print(f" Error: {e}")
|
| 210 |
+
print(f"\nπ‘ Possible Issues:")
|
| 211 |
+
print(f" 1. Unified app is not running")
|
| 212 |
+
print(f" 2. Wrong host/port in configuration")
|
| 213 |
+
print(f" 3. Services failed to start")
|
| 214 |
+
print(f"\nπ To Start Unified App:")
|
| 215 |
+
print(f" python app.py")
|
| 216 |
+
|
| 217 |
+
self.result.add_result(
|
| 218 |
+
"Unified App Health Check",
|
| 219 |
+
False,
|
| 220 |
+
str(e)
|
| 221 |
+
)
|
| 222 |
+
return False
|
| 223 |
+
|
| 224 |
+
async def test_individual_service_health(self):
|
| 225 |
+
"""Test 2: Individual Service Health Checks"""
|
| 226 |
+
print("π Test 2: Individual Service Health Checks")
|
| 227 |
+
|
| 228 |
+
services = [
|
| 229 |
+
("NER", NER_URL),
|
| 230 |
+
("OCR", OCR_URL),
|
| 231 |
+
("RAG", RAG_URL)
|
| 232 |
+
]
|
| 233 |
+
|
| 234 |
+
all_healthy = True
|
| 235 |
+
service_statuses = {}
|
| 236 |
+
|
| 237 |
+
for service_name, service_url in services:
|
| 238 |
+
try:
|
| 239 |
+
response = await self.make_request('GET', f"{service_url}/health")
|
| 240 |
+
|
| 241 |
+
if response.status_code == 200:
|
| 242 |
+
data = response.json()
|
| 243 |
+
status = data.get("status", "unknown")
|
| 244 |
+
service_statuses[service_name] = {
|
| 245 |
+
"healthy": True,
|
| 246 |
+
"status": status,
|
| 247 |
+
"details": data
|
| 248 |
+
}
|
| 249 |
+
print(f" β
{service_name}: {status}")
|
| 250 |
+
else:
|
| 251 |
+
service_statuses[service_name] = {
|
| 252 |
+
"healthy": False,
|
| 253 |
+
"status": f"HTTP {response.status_code}",
|
| 254 |
+
"details": None
|
| 255 |
+
}
|
| 256 |
+
print(f" β {service_name}: HTTP {response.status_code}")
|
| 257 |
+
all_healthy = False
|
| 258 |
+
|
| 259 |
+
except Exception as e:
|
| 260 |
+
service_statuses[service_name] = {
|
| 261 |
+
"healthy": False,
|
| 262 |
+
"status": f"Error: {e}",
|
| 263 |
+
"details": None
|
| 264 |
+
}
|
| 265 |
+
print(f" β {service_name}: {e}")
|
| 266 |
+
all_healthy = False
|
| 267 |
+
|
| 268 |
+
self.result.add_result(
|
| 269 |
+
"Individual Service Health",
|
| 270 |
+
all_healthy,
|
| 271 |
+
f"Services healthy: {sum(1 for s in service_statuses.values() if s['healthy'])}/{len(services)}",
|
| 272 |
+
service_statuses
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
return all_healthy
|
| 276 |
+
|
| 277 |
+
async def test_unified_analysis_text(self):
|
| 278 |
+
"""Test 3: Unified Analysis with Text"""
|
| 279 |
+
print("π Test 3: Unified Analysis with Text")
|
| 280 |
+
|
| 281 |
+
try:
|
| 282 |
+
request_data = {
|
| 283 |
+
"text": THAI_CYANIDE_MURDER_CASE,
|
| 284 |
+
"extract_relationships": True,
|
| 285 |
+
"include_embeddings": False,
|
| 286 |
+
"include_summary": True,
|
| 287 |
+
"generate_graph_files": True,
|
| 288 |
+
"export_formats": ["neo4j", "json"],
|
| 289 |
+
"enable_rag_indexing": True,
|
| 290 |
+
"rag_title": "Cyanide Murder Case Analysis",
|
| 291 |
+
"rag_keywords": ["cyanide", "murder", "investigation", "thai"],
|
| 292 |
+
"rag_metadata": {"test": True, "case_type": "criminal"}
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
response = await self.make_request('POST', f"{UNIFIED_URL}/analyze/unified", json=request_data)
|
| 296 |
+
|
| 297 |
+
if response.status_code == 200:
|
| 298 |
+
data = response.json()
|
| 299 |
+
if data.get("success"):
|
| 300 |
+
service_calls = data.get("service_calls", [])
|
| 301 |
+
ner_analysis = data.get("ner_analysis", {})
|
| 302 |
+
rag_document = data.get("rag_document", {})
|
| 303 |
+
processing_time = data.get("processing_time", 0)
|
| 304 |
+
|
| 305 |
+
# Validate NER analysis
|
| 306 |
+
entities = ner_analysis.get("entities", [])
|
| 307 |
+
relationships = ner_analysis.get("relationships", [])
|
| 308 |
+
|
| 309 |
+
# Track analysis for cleanup
|
| 310 |
+
if ner_analysis.get("analysis_id"):
|
| 311 |
+
self.created_analyses.append(ner_analysis["analysis_id"])
|
| 312 |
+
if rag_document and rag_document.get("document_id"):
|
| 313 |
+
self.created_documents.append(rag_document["document_id"])
|
| 314 |
+
|
| 315 |
+
message = f"Service calls: {', '.join(service_calls)}"
|
| 316 |
+
message += f"\n Processing time: {processing_time:.2f}s"
|
| 317 |
+
message += f"\n NER entities: {len(entities)}"
|
| 318 |
+
message += f"\n NER relationships: {len(relationships)}"
|
| 319 |
+
if rag_document:
|
| 320 |
+
message += f"\n RAG document ID: {rag_document.get('document_id', 'N/A')}"
|
| 321 |
+
message += f"\n RAG chunks: {rag_document.get('total_chunks', 0)}"
|
| 322 |
+
|
| 323 |
+
# Check if we got expected service calls
|
| 324 |
+
expected_calls = ["ner_text"]
|
| 325 |
+
if "enable_rag_indexing" in request_data and request_data["enable_rag_indexing"]:
|
| 326 |
+
expected_calls.append("rag_upload")
|
| 327 |
+
|
| 328 |
+
all_expected_calls = all(call in service_calls for call in expected_calls)
|
| 329 |
+
|
| 330 |
+
self.result.add_result(
|
| 331 |
+
"Unified Analysis (Text)",
|
| 332 |
+
all_expected_calls and entities and len(service_calls) > 0,
|
| 333 |
+
message,
|
| 334 |
+
data
|
| 335 |
+
)
|
| 336 |
+
return data
|
| 337 |
+
else:
|
| 338 |
+
self.result.add_result(
|
| 339 |
+
"Unified Analysis (Text)",
|
| 340 |
+
False,
|
| 341 |
+
data.get("error", "Analysis failed")
|
| 342 |
+
)
|
| 343 |
+
return None
|
| 344 |
+
else:
|
| 345 |
+
self.result.add_result(
|
| 346 |
+
"Unified Analysis (Text)",
|
| 347 |
+
False,
|
| 348 |
+
f"HTTP {response.status_code}: {response.text[:200]}"
|
| 349 |
+
)
|
| 350 |
+
return None
|
| 351 |
+
except Exception as e:
|
| 352 |
+
self.result.add_result(
|
| 353 |
+
"Unified Analysis (Text)",
|
| 354 |
+
False,
|
| 355 |
+
str(e)
|
| 356 |
+
)
|
| 357 |
+
return None
|
| 358 |
+
|
| 359 |
+
async def test_unified_analysis_url(self):
|
| 360 |
+
"""Test 4: Unified Analysis with URL"""
|
| 361 |
+
print("π Test 4: Unified Analysis with URL")
|
| 362 |
+
|
| 363 |
+
try:
|
| 364 |
+
request_data = {
|
| 365 |
+
"url": "https://httpbin.org/html",
|
| 366 |
+
"extract_relationships": True,
|
| 367 |
+
"include_embeddings": False,
|
| 368 |
+
"include_summary": True,
|
| 369 |
+
"generate_graph_files": False,
|
| 370 |
+
"export_formats": ["json"],
|
| 371 |
+
"enable_rag_indexing": True,
|
| 372 |
+
"rag_title": "Test URL Document",
|
| 373 |
+
"rag_keywords": ["test", "url", "httpbin"],
|
| 374 |
+
"rag_metadata": {"test": True, "source": "httpbin"}
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
response = await self.make_request('POST', f"{UNIFIED_URL}/analyze/unified", json=request_data)
|
| 378 |
+
|
| 379 |
+
if response.status_code == 200:
|
| 380 |
+
data = response.json()
|
| 381 |
+
if data.get("success"):
|
| 382 |
+
service_calls = data.get("service_calls", [])
|
| 383 |
+
ner_analysis = data.get("ner_analysis", {})
|
| 384 |
+
rag_document = data.get("rag_document", {})
|
| 385 |
+
|
| 386 |
+
# Track for cleanup
|
| 387 |
+
if ner_analysis.get("analysis_id"):
|
| 388 |
+
self.created_analyses.append(ner_analysis["analysis_id"])
|
| 389 |
+
if rag_document and rag_document.get("document_id"):
|
| 390 |
+
self.created_documents.append(rag_document["document_id"])
|
| 391 |
+
|
| 392 |
+
message = f"Service calls: {', '.join(service_calls)}"
|
| 393 |
+
message += f"\n NER analysis ID: {ner_analysis.get('analysis_id', 'N/A')}"
|
| 394 |
+
if rag_document:
|
| 395 |
+
message += f"\n RAG document ID: {rag_document.get('document_id', 'N/A')}"
|
| 396 |
+
|
| 397 |
+
# Check for expected service calls
|
| 398 |
+
has_ner_url = "ner_url" in service_calls
|
| 399 |
+
has_rag_url = "rag_url" in service_calls
|
| 400 |
+
|
| 401 |
+
self.result.add_result(
|
| 402 |
+
"Unified Analysis (URL)",
|
| 403 |
+
has_ner_url and len(service_calls) > 0,
|
| 404 |
+
message,
|
| 405 |
+
data
|
| 406 |
+
)
|
| 407 |
+
return data
|
| 408 |
+
else:
|
| 409 |
+
self.result.add_result(
|
| 410 |
+
"Unified Analysis (URL)",
|
| 411 |
+
False,
|
| 412 |
+
data.get("error", "URL analysis failed")
|
| 413 |
+
)
|
| 414 |
+
return None
|
| 415 |
+
else:
|
| 416 |
+
self.result.add_result(
|
| 417 |
+
"Unified Analysis (URL)",
|
| 418 |
+
False,
|
| 419 |
+
f"HTTP {response.status_code}: {response.text[:200]}"
|
| 420 |
+
)
|
| 421 |
+
return None
|
| 422 |
+
except Exception as e:
|
| 423 |
+
self.result.add_result(
|
| 424 |
+
"Unified Analysis (URL)",
|
| 425 |
+
False,
|
| 426 |
+
str(e)
|
| 427 |
+
)
|
| 428 |
+
return None
|
| 429 |
+
|
| 430 |
+
async def test_combined_search(self):
|
| 431 |
+
"""Test 5: Combined Search with NER Analysis"""
|
| 432 |
+
print("π Test 5: Combined Search with NER Analysis")
|
| 433 |
+
|
| 434 |
+
# Wait a moment for indexing to complete
|
| 435 |
+
await asyncio.sleep(2)
|
| 436 |
+
|
| 437 |
+
try:
|
| 438 |
+
request_data = {
|
| 439 |
+
"query": "investigation murder case",
|
| 440 |
+
"limit": 5,
|
| 441 |
+
"similarity_threshold": 0.1, # Lower threshold for better results
|
| 442 |
+
"include_ner_analysis": True,
|
| 443 |
+
"ner_export_formats": ["json"]
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
response = await self.make_request('POST', f"{UNIFIED_URL}/search/combined", json=request_data)
|
| 447 |
+
|
| 448 |
+
if response.status_code == 200:
|
| 449 |
+
data = response.json()
|
| 450 |
+
if data.get("success"):
|
| 451 |
+
service_calls = data.get("service_calls", [])
|
| 452 |
+
search_results = data.get("search_results", {})
|
| 453 |
+
results = search_results.get("results", [])
|
| 454 |
+
ner_analyses = search_results.get("ner_analyses", [])
|
| 455 |
+
|
| 456 |
+
message = f"Service calls: {', '.join(service_calls)}"
|
| 457 |
+
message += f"\n Search results: {len(results)}"
|
| 458 |
+
message += f"\n NER analyses: {len(ner_analyses)}"
|
| 459 |
+
message += f"\n Processing time: {data.get('processing_time', 0):.2f}s"
|
| 460 |
+
|
| 461 |
+
# Check for expected service calls
|
| 462 |
+
has_rag_search = "rag_search" in service_calls
|
| 463 |
+
has_ner_analysis = any("ner_text_" in call for call in service_calls)
|
| 464 |
+
|
| 465 |
+
success = has_rag_search and len(service_calls) > 0
|
| 466 |
+
if len(results) == 0:
|
| 467 |
+
self.result.add_warning(
|
| 468 |
+
"Combined Search",
|
| 469 |
+
"No search results found - may need more indexed content"
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
self.result.add_result(
|
| 473 |
+
"Combined Search",
|
| 474 |
+
success,
|
| 475 |
+
message,
|
| 476 |
+
data
|
| 477 |
+
)
|
| 478 |
+
return data
|
| 479 |
+
else:
|
| 480 |
+
self.result.add_result(
|
| 481 |
+
"Combined Search",
|
| 482 |
+
False,
|
| 483 |
+
data.get("error", "Search failed")
|
| 484 |
+
)
|
| 485 |
+
return None
|
| 486 |
+
else:
|
| 487 |
+
self.result.add_result(
|
| 488 |
+
"Combined Search",
|
| 489 |
+
False,
|
| 490 |
+
f"HTTP {response.status_code}: {response.text[:200]}"
|
| 491 |
+
)
|
| 492 |
+
return None
|
| 493 |
+
except Exception as e:
|
| 494 |
+
self.result.add_result(
|
| 495 |
+
"Combined Search",
|
| 496 |
+
False,
|
| 497 |
+
str(e)
|
| 498 |
+
)
|
| 499 |
+
return None
|
| 500 |
+
|
| 501 |
+
async def test_service_proxies(self):
|
| 502 |
+
"""Test 6: Service Proxy Endpoints"""
|
| 503 |
+
print("π Test 6: Service Proxy Endpoints")
|
| 504 |
+
|
| 505 |
+
proxy_tests = []
|
| 506 |
+
|
| 507 |
+
# Test NER proxy
|
| 508 |
+
try:
|
| 509 |
+
ner_data = {
|
| 510 |
+
"text": "Test entity recognition with John Smith working at Microsoft in Seattle.",
|
| 511 |
+
"extract_relationships": True,
|
| 512 |
+
"include_embeddings": False,
|
| 513 |
+
"generate_graph_files": False
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
response = await self.make_request('POST', f"{UNIFIED_URL}/ner/analyze/text", json=ner_data)
|
| 517 |
+
|
| 518 |
+
if response.status_code == 200:
|
| 519 |
+
result = response.json()
|
| 520 |
+
if result.get("success"):
|
| 521 |
+
entities = result.get("entities", [])
|
| 522 |
+
proxy_tests.append(("NER Proxy", True, f"Found {len(entities)} entities"))
|
| 523 |
+
|
| 524 |
+
# Track for cleanup
|
| 525 |
+
if result.get("analysis_id"):
|
| 526 |
+
self.created_analyses.append(result["analysis_id"])
|
| 527 |
+
else:
|
| 528 |
+
proxy_tests.append(("NER Proxy", False, "Analysis failed"))
|
| 529 |
+
else:
|
| 530 |
+
proxy_tests.append(("NER Proxy", False, f"HTTP {response.status_code}"))
|
| 531 |
+
except Exception as e:
|
| 532 |
+
proxy_tests.append(("NER Proxy", False, str(e)))
|
| 533 |
+
|
| 534 |
+
# Test OCR proxy
|
| 535 |
+
try:
|
| 536 |
+
response = await self.make_request('GET', f"{UNIFIED_URL}/ocr/health")
|
| 537 |
+
|
| 538 |
+
if response.status_code == 200:
|
| 539 |
+
proxy_tests.append(("OCR Proxy", True, "Health check passed"))
|
| 540 |
+
else:
|
| 541 |
+
proxy_tests.append(("OCR Proxy", False, f"HTTP {response.status_code}"))
|
| 542 |
+
except Exception as e:
|
| 543 |
+
proxy_tests.append(("OCR Proxy", False, str(e)))
|
| 544 |
+
|
| 545 |
+
# Test RAG proxy
|
| 546 |
+
try:
|
| 547 |
+
response = await self.make_request('GET', f"{UNIFIED_URL}/rag/documents?limit=5")
|
| 548 |
+
|
| 549 |
+
if response.status_code == 200:
|
| 550 |
+
result = response.json()
|
| 551 |
+
documents = result.get("documents", [])
|
| 552 |
+
proxy_tests.append(("RAG Proxy", True, f"Found {len(documents)} documents"))
|
| 553 |
+
else:
|
| 554 |
+
proxy_tests.append(("RAG Proxy", False, f"HTTP {response.status_code}"))
|
| 555 |
+
except Exception as e:
|
| 556 |
+
proxy_tests.append(("RAG Proxy", False, str(e)))
|
| 557 |
+
|
| 558 |
+
# Evaluate proxy tests
|
| 559 |
+
passed_proxies = sum(1 for _, passed, _ in proxy_tests if passed)
|
| 560 |
+
total_proxies = len(proxy_tests)
|
| 561 |
+
|
| 562 |
+
for test_name, passed, message in proxy_tests:
|
| 563 |
+
print(f" {'β
' if passed else 'β'} {test_name}: {message}")
|
| 564 |
+
|
| 565 |
+
self.result.add_result(
|
| 566 |
+
"Service Proxies",
|
| 567 |
+
passed_proxies == total_proxies,
|
| 568 |
+
f"Proxies working: {passed_proxies}/{total_proxies}",
|
| 569 |
+
{"proxy_results": proxy_tests}
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
return passed_proxies > 0
|
| 573 |
+
|
| 574 |
+
async def test_file_upload_unified(self):
|
| 575 |
+
"""Test 7: File Upload through Unified Interface"""
|
| 576 |
+
print("π Test 7: File Upload through Unified Interface")
|
| 577 |
+
|
| 578 |
+
try:
|
| 579 |
+
# Create test document
|
| 580 |
+
test_content = """
|
| 581 |
+
Technical Report: Advanced AI Systems
|
| 582 |
+
|
| 583 |
+
This report examines the integration of Named Entity Recognition (NER),
|
| 584 |
+
Optical Character Recognition (OCR), and Retrieval-Augmented Generation (RAG)
|
| 585 |
+
systems in a unified architecture.
|
| 586 |
+
|
| 587 |
+
Key Personnel:
|
| 588 |
+
- Dr. Alice Johnson, Lead AI Researcher at TechCorp
|
| 589 |
+
- Prof. Bob Smith, University of Technology
|
| 590 |
+
- Sarah Wilson, Data Scientist
|
| 591 |
+
|
| 592 |
+
Technical Components:
|
| 593 |
+
- Azure OpenAI for embeddings and language processing
|
| 594 |
+
- PostgreSQL with vector extensions for data storage
|
| 595 |
+
- FastAPI for microservice architecture
|
| 596 |
+
|
| 597 |
+
The system processes documents through multiple stages:
|
| 598 |
+
1. OCR extraction for scanned documents
|
| 599 |
+
2. NER analysis for entity and relationship extraction
|
| 600 |
+
3. RAG indexing for searchable knowledge base
|
| 601 |
+
|
| 602 |
+
Testing conducted on October 15, 2024 showed 95% accuracy.
|
| 603 |
+
Total budget: $250,000 for the complete implementation.
|
| 604 |
+
"""
|
| 605 |
+
|
| 606 |
+
# Test through NER proxy (file upload)
|
| 607 |
+
file_content = test_content.encode('utf-8')
|
| 608 |
+
files = {"file": ("test_report.txt", io.BytesIO(file_content), "text/plain")}
|
| 609 |
+
data = {
|
| 610 |
+
"extract_relationships": "true",
|
| 611 |
+
"include_embeddings": "false",
|
| 612 |
+
"include_summary": "true",
|
| 613 |
+
"generate_graph_files": "true",
|
| 614 |
+
"export_formats": "neo4j,json"
|
| 615 |
+
}
|
| 616 |
+
|
| 617 |
+
response = await self.make_request(
|
| 618 |
+
'POST',
|
| 619 |
+
f"{UNIFIED_URL}/ner/analyze/file",
|
| 620 |
+
files=files,
|
| 621 |
+
data=data
|
| 622 |
+
)
|
| 623 |
+
|
| 624 |
+
if response.status_code == 200:
|
| 625 |
+
result = response.json()
|
| 626 |
+
if result.get("success"):
|
| 627 |
+
entities = result.get("entities", [])
|
| 628 |
+
relationships = result.get("relationships", [])
|
| 629 |
+
|
| 630 |
+
# Track for cleanup
|
| 631 |
+
if result.get("analysis_id"):
|
| 632 |
+
self.created_analyses.append(result["analysis_id"])
|
| 633 |
+
|
| 634 |
+
message = f"File processed successfully"
|
| 635 |
+
message += f"\n Entities: {len(entities)}"
|
| 636 |
+
message += f"\n Relationships: {len(relationships)}"
|
| 637 |
+
message += f"\n Language: {result.get('language', 'unknown')}"
|
| 638 |
+
|
| 639 |
+
# Look for expected entities
|
| 640 |
+
person_entities = [e for e in entities if e.get('label') == 'PERSON']
|
| 641 |
+
org_entities = [e for e in entities if e.get('label') == 'ORGANIZATION']
|
| 642 |
+
money_entities = [e for e in entities if e.get('label') == 'MONEY']
|
| 643 |
+
|
| 644 |
+
message += f"\n People found: {len(person_entities)}"
|
| 645 |
+
message += f"\n Organizations found: {len(org_entities)}"
|
| 646 |
+
message += f"\n Money amounts found: {len(money_entities)}"
|
| 647 |
+
|
| 648 |
+
success = len(entities) > 0 and result.get("analysis_id")
|
| 649 |
+
|
| 650 |
+
self.result.add_result(
|
| 651 |
+
"File Upload (Unified)",
|
| 652 |
+
success,
|
| 653 |
+
message,
|
| 654 |
+
result
|
| 655 |
+
)
|
| 656 |
+
return result
|
| 657 |
+
else:
|
| 658 |
+
self.result.add_result(
|
| 659 |
+
"File Upload (Unified)",
|
| 660 |
+
False,
|
| 661 |
+
result.get("error", "File analysis failed")
|
| 662 |
+
)
|
| 663 |
+
return None
|
| 664 |
+
else:
|
| 665 |
+
self.result.add_result(
|
| 666 |
+
"File Upload (Unified)",
|
| 667 |
+
False,
|
| 668 |
+
f"HTTP {response.status_code}: {response.text[:200]}"
|
| 669 |
+
)
|
| 670 |
+
return None
|
| 671 |
+
except Exception as e:
|
| 672 |
+
self.result.add_result(
|
| 673 |
+
"File Upload (Unified)",
|
| 674 |
+
False,
|
| 675 |
+
str(e)
|
| 676 |
+
)
|
| 677 |
+
return None
|
| 678 |
+
|
| 679 |
+
async def test_service_discovery(self):
|
| 680 |
+
"""Test 8: Service Discovery and Listing"""
|
| 681 |
+
print("π Test 8: Service Discovery and Listing")
|
| 682 |
+
|
| 683 |
+
try:
|
| 684 |
+
response = await self.make_request('GET', f"{UNIFIED_URL}/services")
|
| 685 |
+
|
| 686 |
+
if response.status_code == 200:
|
| 687 |
+
data = response.json()
|
| 688 |
+
services = data.get("services", {})
|
| 689 |
+
unified = data.get("unified", {})
|
| 690 |
+
|
| 691 |
+
expected_services = ["ner", "ocr", "rag"]
|
| 692 |
+
found_services = list(services.keys())
|
| 693 |
+
|
| 694 |
+
message = f"Services discovered: {', '.join(found_services)}"
|
| 695 |
+
message += f"\n Unified endpoint: {unified.get('url', 'N/A')}"
|
| 696 |
+
|
| 697 |
+
for service_name, service_info in services.items():
|
| 698 |
+
endpoints = service_info.get("endpoints", [])
|
| 699 |
+
message += f"\n {service_name}: {len(endpoints)} endpoints"
|
| 700 |
+
|
| 701 |
+
all_expected_found = all(service in found_services for service in expected_services)
|
| 702 |
+
|
| 703 |
+
self.result.add_result(
|
| 704 |
+
"Service Discovery",
|
| 705 |
+
all_expected_found,
|
| 706 |
+
message,
|
| 707 |
+
data
|
| 708 |
+
)
|
| 709 |
+
return data
|
| 710 |
+
else:
|
| 711 |
+
self.result.add_result(
|
| 712 |
+
"Service Discovery",
|
| 713 |
+
False,
|
| 714 |
+
f"HTTP {response.status_code}"
|
| 715 |
+
)
|
| 716 |
+
return None
|
| 717 |
+
except Exception as e:
|
| 718 |
+
self.result.add_result(
|
| 719 |
+
"Service Discovery",
|
| 720 |
+
False,
|
| 721 |
+
str(e)
|
| 722 |
+
)
|
| 723 |
+
return None
|
| 724 |
+
|
| 725 |
+
async def test_system_performance(self):
|
| 726 |
+
"""Test 9: System Performance and Reliability"""
|
| 727 |
+
print("π Test 9: System Performance and Reliability")
|
| 728 |
+
|
| 729 |
+
try:
|
| 730 |
+
# Test multiple concurrent requests
|
| 731 |
+
tasks = []
|
| 732 |
+
test_texts = [
|
| 733 |
+
"Performance test with Apple Inc and CEO Tim Cook in California.",
|
| 734 |
+
"Reliability testing of Microsoft Azure services in Seattle.",
|
| 735 |
+
"Load testing with Google Cloud Platform and AI systems."
|
| 736 |
+
]
|
| 737 |
+
|
| 738 |
+
start_time = time.time()
|
| 739 |
+
|
| 740 |
+
for i, text in enumerate(test_texts):
|
| 741 |
+
task = self.make_request(
|
| 742 |
+
'POST',
|
| 743 |
+
f"{UNIFIED_URL}/ner/analyze/text",
|
| 744 |
+
json={
|
| 745 |
+
"text": text,
|
| 746 |
+
"extract_relationships": True,
|
| 747 |
+
"include_embeddings": False,
|
| 748 |
+
"generate_graph_files": False
|
| 749 |
+
}
|
| 750 |
+
)
|
| 751 |
+
tasks.append(task)
|
| 752 |
+
|
| 753 |
+
# Execute concurrent requests
|
| 754 |
+
responses = await asyncio.gather(*tasks, return_exceptions=True)
|
| 755 |
+
total_time = time.time() - start_time
|
| 756 |
+
|
| 757 |
+
# Analyze results
|
| 758 |
+
successful_requests = 0
|
| 759 |
+
total_entities = 0
|
| 760 |
+
|
| 761 |
+
for i, response in enumerate(responses):
|
| 762 |
+
if isinstance(response, Exception):
|
| 763 |
+
continue
|
| 764 |
+
|
| 765 |
+
if response.status_code == 200:
|
| 766 |
+
result = response.json()
|
| 767 |
+
if result.get("success"):
|
| 768 |
+
successful_requests += 1
|
| 769 |
+
entities = result.get("entities", [])
|
| 770 |
+
total_entities += len(entities)
|
| 771 |
+
|
| 772 |
+
# Track for cleanup
|
| 773 |
+
if result.get("analysis_id"):
|
| 774 |
+
self.created_analyses.append(result["analysis_id"])
|
| 775 |
+
|
| 776 |
+
avg_time_per_request = total_time / len(test_texts)
|
| 777 |
+
|
| 778 |
+
message = f"Concurrent requests: {successful_requests}/{len(test_texts)} successful"
|
| 779 |
+
message += f"\n Total time: {total_time:.2f}s"
|
| 780 |
+
message += f"\n Avg time per request: {avg_time_per_request:.2f}s"
|
| 781 |
+
message += f"\n Total entities found: {total_entities}"
|
| 782 |
+
|
| 783 |
+
# Performance criteria
|
| 784 |
+
performance_ok = (
|
| 785 |
+
successful_requests >= len(test_texts) * 0.8 and # 80% success rate
|
| 786 |
+
avg_time_per_request < 10.0 # Under 10 seconds per request
|
| 787 |
+
)
|
| 788 |
+
|
| 789 |
+
self.result.add_result(
|
| 790 |
+
"System Performance",
|
| 791 |
+
performance_ok,
|
| 792 |
+
message,
|
| 793 |
+
{
|
| 794 |
+
"successful_requests": successful_requests,
|
| 795 |
+
"total_requests": len(test_texts),
|
| 796 |
+
"total_time": total_time,
|
| 797 |
+
"avg_time_per_request": avg_time_per_request,
|
| 798 |
+
"total_entities": total_entities
|
| 799 |
+
}
|
| 800 |
+
)
|
| 801 |
+
|
| 802 |
+
return performance_ok
|
| 803 |
+
|
| 804 |
+
except Exception as e:
|
| 805 |
+
self.result.add_result(
|
| 806 |
+
"System Performance",
|
| 807 |
+
False,
|
| 808 |
+
str(e)
|
| 809 |
+
)
|
| 810 |
+
return False
|
| 811 |
+
|
| 812 |
+
async def test_error_handling(self):
|
| 813 |
+
"""Test 10: Error Handling and Resilience"""
|
| 814 |
+
print("π Test 10: Error Handling and Resilience")
|
| 815 |
+
|
| 816 |
+
error_tests = []
|
| 817 |
+
|
| 818 |
+
# Test 1: Invalid unified analysis request
|
| 819 |
+
try:
|
| 820 |
+
response = await self.make_request(
|
| 821 |
+
'POST',
|
| 822 |
+
f"{UNIFIED_URL}/analyze/unified",
|
| 823 |
+
json={"invalid": "data"}
|
| 824 |
+
)
|
| 825 |
+
|
| 826 |
+
if response.status_code in [400, 422]: # Expected validation error
|
| 827 |
+
error_tests.append(("Invalid Request Handling", True, "Properly rejected invalid data"))
|
| 828 |
+
else:
|
| 829 |
+
error_tests.append(("Invalid Request Handling", False, f"Unexpected status: {response.status_code}"))
|
| 830 |
+
except Exception as e:
|
| 831 |
+
error_tests.append(("Invalid Request Handling", False, str(e)))
|
| 832 |
+
|
| 833 |
+
# Test 2: Empty text analysis
|
| 834 |
+
try:
|
| 835 |
+
response = await self.make_request(
|
| 836 |
+
'POST',
|
| 837 |
+
f"{UNIFIED_URL}/ner/analyze/text",
|
| 838 |
+
json={"text": "", "extract_relationships": True}
|
| 839 |
+
)
|
| 840 |
+
|
| 841 |
+
if response.status_code in [400, 422]: # Expected validation error
|
| 842 |
+
error_tests.append(("Empty Text Handling", True, "Properly rejected empty text"))
|
| 843 |
+
else:
|
| 844 |
+
result = response.json()
|
| 845 |
+
if not result.get("success"):
|
| 846 |
+
error_tests.append(("Empty Text Handling", True, "Failed gracefully"))
|
| 847 |
+
else:
|
| 848 |
+
error_tests.append(("Empty Text Handling", False, "Should have failed"))
|
| 849 |
+
except Exception as e:
|
| 850 |
+
error_tests.append(("Empty Text Handling", False, str(e)))
|
| 851 |
+
|
| 852 |
+
# Test 3: Invalid URL
|
| 853 |
+
try:
|
| 854 |
+
response = await self.make_request(
|
| 855 |
+
'POST',
|
| 856 |
+
f"{UNIFIED_URL}/analyze/unified",
|
| 857 |
+
json={
|
| 858 |
+
"url": "https://invalid-url-that-does-not-exist-12345.com",
|
| 859 |
+
"extract_relationships": True
|
| 860 |
+
}
|
| 861 |
+
)
|
| 862 |
+
|
| 863 |
+
if response.status_code == 200:
|
| 864 |
+
result = response.json()
|
| 865 |
+
if not result.get("success"):
|
| 866 |
+
error_tests.append(("Invalid URL Handling", True, "Failed gracefully with invalid URL"))
|
| 867 |
+
else:
|
| 868 |
+
error_tests.append(("Invalid URL Handling", False, "Should have failed"))
|
| 869 |
+
else:
|
| 870 |
+
error_tests.append(("Invalid URL Handling", True, f"Rejected invalid URL (HTTP {response.status_code})"))
|
| 871 |
+
except Exception as e:
|
| 872 |
+
error_tests.append(("Invalid URL Handling", False, str(e)))
|
| 873 |
+
|
| 874 |
+
# Evaluate error handling tests
|
| 875 |
+
passed_error_tests = sum(1 for _, passed, _ in error_tests if passed)
|
| 876 |
+
total_error_tests = len(error_tests)
|
| 877 |
+
|
| 878 |
+
for test_name, passed, message in error_tests:
|
| 879 |
+
print(f" {'β
' if passed else 'β'} {test_name}: {message}")
|
| 880 |
+
|
| 881 |
+
self.result.add_result(
|
| 882 |
+
"Error Handling",
|
| 883 |
+
passed_error_tests >= total_error_tests * 0.8, # 80% success rate
|
| 884 |
+
f"Error tests passed: {passed_error_tests}/{total_error_tests}",
|
| 885 |
+
{"error_test_results": error_tests}
|
| 886 |
+
)
|
| 887 |
+
|
| 888 |
+
return passed_error_tests > 0
|
| 889 |
+
|
| 890 |
+
async def cleanup_test_data(self):
|
| 891 |
+
"""Clean up test data"""
|
| 892 |
+
print("\nπ§Ή Cleaning up test data...")
|
| 893 |
+
|
| 894 |
+
cleanup_count = 0
|
| 895 |
+
cleanup_errors = 0
|
| 896 |
+
|
| 897 |
+
# Clean up NER analyses
|
| 898 |
+
for analysis_id in self.created_analyses:
|
| 899 |
+
try:
|
| 900 |
+
# Try direct service first
|
| 901 |
+
response = await self.make_request('DELETE', f"{NER_URL}/analysis/{analysis_id}")
|
| 902 |
+
if response.status_code in [200, 404]: # 404 is OK (already deleted)
|
| 903 |
+
cleanup_count += 1
|
| 904 |
+
else:
|
| 905 |
+
cleanup_errors += 1
|
| 906 |
+
except Exception as e:
|
| 907 |
+
cleanup_errors += 1
|
| 908 |
+
print(f" β οΈ Failed to cleanup analysis {analysis_id[:8]}...: {e}")
|
| 909 |
+
|
| 910 |
+
# Clean up RAG documents
|
| 911 |
+
for document_id in self.created_documents:
|
| 912 |
+
try:
|
| 913 |
+
# Try through unified proxy
|
| 914 |
+
response = await self.make_request('DELETE', f"{UNIFIED_URL}/rag/documents/{document_id}")
|
| 915 |
+
if response.status_code in [200, 404]: # 404 is OK (already deleted)
|
| 916 |
+
cleanup_count += 1
|
| 917 |
+
else:
|
| 918 |
+
cleanup_errors += 1
|
| 919 |
+
except Exception as e:
|
| 920 |
+
cleanup_errors += 1
|
| 921 |
+
print(f" β οΈ Failed to cleanup document {document_id[:8]}...: {e}")
|
| 922 |
+
|
| 923 |
+
if cleanup_count > 0:
|
| 924 |
+
print(f" β
Cleaned up {cleanup_count} test items")
|
| 925 |
+
if cleanup_errors > 0:
|
| 926 |
+
print(f" β οΈ Failed to cleanup {cleanup_errors} items")
|
| 927 |
+
|
| 928 |
+
async def run_comprehensive_tests(self):
|
| 929 |
+
"""Run all comprehensive unified system tests"""
|
| 930 |
+
print("π Unified AI Services - Comprehensive Test Suite")
|
| 931 |
+
print("Testing: NER + OCR + RAG Integration with Unified Workflows")
|
| 932 |
+
print("=" * 80)
|
| 933 |
+
|
| 934 |
+
start_time = time.time()
|
| 935 |
+
|
| 936 |
+
# Test sequence
|
| 937 |
+
tests = [
|
| 938 |
+
("Unified App Health", self.test_unified_app_health),
|
| 939 |
+
("Individual Service Health", self.test_individual_service_health),
|
| 940 |
+
("Unified Analysis (Text)", self.test_unified_analysis_text),
|
| 941 |
+
("Unified Analysis (URL)", self.test_unified_analysis_url),
|
| 942 |
+
("Combined Search", self.test_combined_search),
|
| 943 |
+
("Service Proxies", self.test_service_proxies),
|
| 944 |
+
("File Upload (Unified)", self.test_file_upload_unified),
|
| 945 |
+
("Service Discovery", self.test_service_discovery),
|
| 946 |
+
("System Performance", self.test_system_performance),
|
| 947 |
+
("Error Handling", self.test_error_handling)
|
| 948 |
+
]
|
| 949 |
+
|
| 950 |
+
for test_name, test_func in tests:
|
| 951 |
+
print(f"\n" + "=" * 80)
|
| 952 |
+
try:
|
| 953 |
+
await test_func()
|
| 954 |
+
except Exception as e:
|
| 955 |
+
print(f"β {test_name} failed with exception: {e}")
|
| 956 |
+
self.result.add_result(test_name, False, f"Exception: {e}")
|
| 957 |
+
|
| 958 |
+
# Cleanup
|
| 959 |
+
print(f"\n" + "=" * 80)
|
| 960 |
+
await self.cleanup_test_data()
|
| 961 |
+
|
| 962 |
+
# Final summary
|
| 963 |
+
total_time = time.time() - start_time
|
| 964 |
+
print(f"\n" + "=" * 80)
|
| 965 |
+
print("π UNIFIED SYSTEM COMPREHENSIVE TEST RESULTS")
|
| 966 |
+
print("=" * 80)
|
| 967 |
+
|
| 968 |
+
self.result.print_summary()
|
| 969 |
+
|
| 970 |
+
print(f"\nTEST EXECUTION:")
|
| 971 |
+
print(f"Total Time: {total_time:.2f} seconds")
|
| 972 |
+
print(f"Tests Created: NER analyses: {len(self.created_analyses)}, RAG documents: {len(self.created_documents)}")
|
| 973 |
+
|
| 974 |
+
passed = self.result.passed_tests
|
| 975 |
+
total = self.result.total_tests
|
| 976 |
+
|
| 977 |
+
if passed == total:
|
| 978 |
+
print(f"\nπ ALL UNIFIED SYSTEM TESTS PASSED!")
|
| 979 |
+
print(f"β
Unified application is fully operational")
|
| 980 |
+
print(f"β
All services are integrated and working")
|
| 981 |
+
print(f"β
Combined workflows are functional")
|
| 982 |
+
print(f"β
Service proxies are working")
|
| 983 |
+
print(f"β
Error handling is robust")
|
| 984 |
+
|
| 985 |
+
print(f"\nπ― UNIFIED SYSTEM CAPABILITIES VERIFIED:")
|
| 986 |
+
print(f" β’ NER + OCR + RAG service integration")
|
| 987 |
+
print(f" β’ Unified analysis workflows")
|
| 988 |
+
print(f" β’ Combined search with NER enhancement")
|
| 989 |
+
print(f" β’ Service proxy functionality")
|
| 990 |
+
print(f" β’ Multi-language support")
|
| 991 |
+
print(f" β’ Concurrent request handling")
|
| 992 |
+
print(f" β’ Comprehensive error handling")
|
| 993 |
+
print(f" β’ Real-time service health monitoring")
|
| 994 |
+
|
| 995 |
+
else:
|
| 996 |
+
print(f"\nβ οΈ SOME UNIFIED SYSTEM TESTS FAILED")
|
| 997 |
+
print(f"β {self.result.failed_tests} out of {total} tests failed")
|
| 998 |
+
|
| 999 |
+
print(f"\nπ§ TROUBLESHOOTING STEPS:")
|
| 1000 |
+
print(f"1. Check that all services are running:")
|
| 1001 |
+
print(f" β’ NER Service: {NER_URL}/health")
|
| 1002 |
+
print(f" β’ OCR Service: {OCR_URL}/health")
|
| 1003 |
+
print(f" β’ RAG Service: {RAG_URL}/health")
|
| 1004 |
+
print(f" β’ Unified App: {UNIFIED_URL}/health")
|
| 1005 |
+
print(f"2. Verify configuration in .env file")
|
| 1006 |
+
print(f"3. Check service logs for errors")
|
| 1007 |
+
print(f"4. Ensure all dependencies are installed")
|
| 1008 |
+
print(f"5. Verify database connectivity")
|
| 1009 |
+
|
| 1010 |
+
return passed == total
|
| 1011 |
+
|
| 1012 |
+
async def main():
|
| 1013 |
+
"""Main test runner"""
|
| 1014 |
+
if len(sys.argv) > 1:
|
| 1015 |
+
unified_url = sys.argv[1]
|
| 1016 |
+
else:
|
| 1017 |
+
unified_url = UNIFIED_URL
|
| 1018 |
+
|
| 1019 |
+
# Update global URL
|
| 1020 |
+
global UNIFIED_URL
|
| 1021 |
+
UNIFIED_URL = unified_url
|
| 1022 |
+
|
| 1023 |
+
print(f"π§ͺ Unified AI Services - Comprehensive Test Suite")
|
| 1024 |
+
print(f"π‘ Testing unified system at: {UNIFIED_URL}")
|
| 1025 |
+
print(f"π Expected services:")
|
| 1026 |
+
print(f" β’ NER Service: {NER_URL}")
|
| 1027 |
+
print(f" β’ OCR Service: {OCR_URL}")
|
| 1028 |
+
print(f" β’ RAG Service: {RAG_URL}")
|
| 1029 |
+
print(f" β’ Unified App: {UNIFIED_URL}")
|
| 1030 |
+
|
| 1031 |
+
print(f"\nMake sure the unified application is running before starting tests.")
|
| 1032 |
+
print(f"Start command: python app.py")
|
| 1033 |
+
|
| 1034 |
+
# Wait for user confirmation
|
| 1035 |
+
input(f"\nPress Enter to start unified system tests...")
|
| 1036 |
+
|
| 1037 |
+
async with UnifiedSystemTester() as tester:
|
| 1038 |
+
success = await tester.run_comprehensive_tests()
|
| 1039 |
+
|
| 1040 |
+
if success:
|
| 1041 |
+
print(f"\nπ UNIFIED SYSTEM VERIFICATION COMPLETE!")
|
| 1042 |
+
print(f"β
All services are integrated and operational")
|
| 1043 |
+
print(f"β
Combined workflows are working perfectly")
|
| 1044 |
+
print(f"β
Ready for production deployment")
|
| 1045 |
+
|
| 1046 |
+
sys.exit(0)
|
| 1047 |
+
else:
|
| 1048 |
+
print(f"\nπ§ UNIFIED SYSTEM NEEDS ATTENTION")
|
| 1049 |
+
print(f"β Some functionality is not working correctly")
|
| 1050 |
+
print(f"π Review the test results above for specific issues")
|
| 1051 |
+
|
| 1052 |
+
sys.exit(1)
|
| 1053 |
+
|
| 1054 |
+
if __name__ == "__main__":
|
| 1055 |
+
asyncio.run(main())
|