Chirapath commited on
Commit
963ae98
Β·
verified Β·
1 Parent(s): 463f8b6

First draft coding project

Browse files
Files changed (10) hide show
  1. .env +120 -0
  2. README.md +470 -12
  3. app.py +808 -0
  4. configs.py +372 -0
  5. demo.py +527 -0
  6. gettingstart.md +485 -0
  7. manage_services.py +550 -0
  8. requirements.txt +45 -0
  9. setup.py +511 -0
  10. test.py +1055 -0
.env ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =================================================================
2
+ # OCR SERVICE CONFIGURATION
3
+ # =================================================================
4
+ # Get these from your Azure Portal -> Document Intelligence resource
5
+ AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://ocrservice256807.cognitiveservices.azure.com/
6
+ AZURE_DOCUMENT_INTELLIGENCE_KEY=3HVIJlvMH1AF5wuNSv0w1qd43AejgulvtdFInpFGJambLtr0DvISJQQJ99BGACqBBLyXJ3w3AAALACOG4NKs
7
+
8
+ # Server Configuration (Optional)
9
+ OCR_HOST=0.0.0.0
10
+ OCR_PORT=8400
11
+ OCR_DEBUG=True
12
+ OCR_LOG_LEVEL=INFO
13
+
14
+ # # CORS Configuration (Optional - for production)
15
+ # ALLOWED_ORIGINS=["http://localhost:3000", "https://yourdomain.com"]
16
+
17
+ # Rate Limiting (Optional - for production)
18
+ RATE_LIMIT_REQUESTS=100
19
+ RATE_LIMIT_WINDOW=3600
20
+
21
+ # Web Scraping Configuration (Optional)
22
+ MAX_IMAGES_PER_PAGE=10
23
+ REQUEST_TIMEOUT=30
24
+ USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
25
+
26
+ # ================================================================
27
+ # RAG SERVICE CONFIGURATION
28
+ # ================================================================
29
+
30
+ AZURE_OPENAI_ENDPOINT=https://ai-models-service256807.cognitiveservices.azure.com/
31
+ AZURE_OPENAI_KEY=3rz1lDxopNZktRyKpRbclNb8Evp5l3sgGy7ITNRgBdhKr1pGkY3OJQQJ99BGACYeBjFXJ3w3AAAAACOGcIO1
32
+ AZURE_OPENAI_DEPLOYMENT=text-embedding-3-small
33
+ AZURE_OPENAI_API_VERSION=2024-12-01-preview
34
+
35
+ PG_HOST=sbaipocpostgresql.postgres.database.azure.com
36
+ PG_PORT=5432
37
+ PG_DATABASE=vectorsearch
38
+ PG_USER=user
39
+ PG_PASSWORD="P@ssw0rd"
40
+ PG_SSL_MODE=require
41
+
42
+ OCR_SERVICE_URL=http://localhost:8400
43
+
44
+ RAG_HOST=0.0.0.0
45
+ RAG_PORT=8401
46
+ RAG_DEBUG=True
47
+ RAG_LOG_LEVEL=INFO
48
+
49
+ CHUNK_SIZE=1536
50
+ CHUNK_OVERLAP=100
51
+ MIN_CHUNK_SIZE=200
52
+
53
+ ALLOWED_ORIGINS=*
54
+
55
+ DEFAULT_SEARCH_LIMIT=10
56
+ DEFAULT_SIMILARITY_THRESHOLD=0.5
57
+ MAX_SEARCH_RESULTS=100
58
+
59
+ # Database connection pooling
60
+ DB_POOL_MIN_SIZE=2
61
+ DB_POOL_MAX_SIZE=20
62
+ DB_COMMAND_TIMEOUT=60
63
+
64
+ # Request timeouts (seconds)
65
+ REQUEST_TIMEOUT=30
66
+ EMBEDDING_TIMEOUT=60
67
+
68
+ SERVICE_VERSION=1.0.0
69
+ RAG_SERVICE_URL=http://localhost:8401
70
+ TEST_TIMEOUT=30
71
+
72
+ # =================================================================
73
+ # NER SERVICE CONFIGURATION
74
+ # =================================================================
75
+
76
+ # Server Configuration
77
+ NER_HOST=0.0.0.0
78
+ NER_PORT=8500
79
+ DEBUG=True
80
+ NER_LOG_LEVEL=INFO
81
+
82
+ # OCR Service Configuration (from your existing OCR service)
83
+ OCR_SERVICE_URL=http://localhost:8400
84
+
85
+ # DeepSeek API Configuration
86
+ # Get these from your Azure AI service or DeepSeek API
87
+ DEEPSEEK_ENDPOINT=https://ai-models-service256807.services.ai.azure.com/models
88
+ DEEPSEEK_API_KEY=3rz1lDxopNZktRyKpRbclNb8Evp5l3sgGy7ITNRgBdhKr1pGkY3OJQQJ99BGACYeBjFXJ3w3AAAAACOGcIO1
89
+ DEEPSEEK_MODEL=DeepSeek-R1-0528
90
+
91
+ # Azure OpenAI Configuration (for embeddings)
92
+ # Get these from your Azure OpenAI resource
93
+ AZURE_OPENAI_ENDPOINT=https://openaiservice2568.openai.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15
94
+ AZURE_OPENAI_API_KEY=8CZSXFphWviu1KBpweiUntRKrJgYR2hApSUT76f5MlBsSjuvKulnJQQJ99BCACYeBjFXJ3w3AAABACOGc2vU
95
+ EMBEDDING_MODEL=text-embedding-3-large
96
+
97
+ # Azure Storage Configuration (SAS Authentication)
98
+ # Option 1: Use Storage Account URL + SAS Token (Recommended)
99
+ AZURE_STORAGE_ACCOUNT_URL=https://historylog256807.blob.core.windows.net/
100
+ AZURE_BLOB_SAS_TOKEN="sp=racwdli&st=2025-07-07T09:05:50Z&se=2026-07-06T17:05:50Z&sv=2024-11-04&sr=c&sig=TKDn8t6QLFnO70bstW%2FH%2FjrYGczNnFyOap1qG9RTPEU%3D"
101
+
102
+ # Option 2: Use complete SAS URL (Alternative - leave blank if using Option 1)
103
+ #AZURE_BLOB_SAS_URL=https://historylog256807.blob.core.windows.net/historylog?sp=racwdli&st=2025-07-07T09:05:50Z&se=2026-07-06T17:05:50Z&sv=2024-11-04&sr=c&sig=TKDn8t6QLFnO70bstW%2FH%2FjrYGczNnFyOap1qG9RTPEU%3D
104
+
105
+ BLOB_CONTAINER=historylog
106
+
107
+ # PostgreSQL Configuration (Azure Database for PostgreSQL flexible server)
108
+ POSTGRES_HOST=sbaipocpostgresql.postgres.database.azure.com
109
+ POSTGRES_PORT=5432
110
+ POSTGRES_USER=user
111
+ POSTGRES_PASSWORD="P@ssw0rd"
112
+ POSTGRES_DATABASE=postgres
113
+ AZURE_OPENAI_DEPLOYMENT_NAME=text-embedding-3-large
114
+
115
+ # Processing Configuration
116
+ MAX_FILE_SIZE=50 # Maximum file size in MB
117
+ REQUEST_TIMEOUT=300 # Request timeout in seconds
118
+
119
+ # CORS Configuration (optional)
120
+ ALLOWED_ORIGINS=*
README.md CHANGED
@@ -1,12 +1,470 @@
1
- ---
2
- title: SB PoC
3
- emoji: πŸ“š
4
- colorFrom: blue
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.35.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Unified AI Services
2
+
3
+ A comprehensive AI platform that integrates Named Entity Recognition (NER), Optical Character Recognition (OCR), and Retrieval-Augmented Generation (RAG) services into a unified application.
4
+
5
+ ## 🌟 Features
6
+
7
+ ### Core Services
8
+ - **NER Service** (Port 8500): Advanced named entity recognition with relationship extraction
9
+ - **OCR Service** (Port 8400): Document processing with Azure Document Intelligence
10
+ - **RAG Service** (Port 8401): Vector search and document retrieval
11
+ - **Unified App** (Port 8000): Coordinated workflows and service management
12
+
13
+ ### Key Capabilities
14
+ - βœ… Multi-language support (Thai + English)
15
+ - βœ… Complex relationship extraction
16
+ - βœ… Entity deduplication
17
+ - βœ… Graph database exports (Neo4j, GraphML, GEXF)
18
+ - βœ… Vector search with semantic similarity
19
+ - βœ… Document processing (PDF, images, text)
20
+ - βœ… Real-time service health monitoring
21
+ - βœ… Unified workflows combining all services
22
+ - βœ… Comprehensive API documentation
23
+
24
+ ## πŸš€ Quick Start
25
+
26
+ ### Prerequisites
27
+ - Python 3.8 or higher
28
+ - PostgreSQL with vector extension support
29
+ - Azure OpenAI account
30
+ - Azure Document Intelligence account
31
+ - DeepSeek API account (for advanced NER)
32
+
33
+ ### Automated Setup
34
+
35
+ 1. **Clone and navigate to the project directory**
36
+ ```bash
37
+ cd unified-ai-services
38
+ ```
39
+
40
+ 2. **Run the automated setup**
41
+ ```bash
42
+ python setup.py
43
+ ```
44
+
45
+ This will:
46
+ - Check your Python environment
47
+ - Create necessary directories
48
+ - Help you configure .env file
49
+ - Install dependencies
50
+ - Validate configuration
51
+ - Create startup scripts
52
+
53
+ 3. **Start the unified application**
54
+ ```bash
55
+ python app.py
56
+ ```
57
+
58
+ Or use the generated scripts:
59
+ - Windows: `start_services.bat`
60
+ - Unix/Linux/Mac: `./start_services.sh`
61
+
62
+ 4. **Run comprehensive tests**
63
+ ```bash
64
+ python test_unified.py
65
+ ```
66
+
67
+ Or use the generated scripts:
68
+ - Windows: `run_tests.bat`
69
+ - Unix/Linux/Mac: `./run_tests.sh`
70
+
71
+ ### Manual Setup
72
+
73
+ If you prefer manual setup:
74
+
75
+ 1. **Install dependencies**
76
+ ```bash
77
+ pip install -r requirements.txt
78
+ ```
79
+
80
+ 2. **Create .env file** (copy from .env.example)
81
+ ```bash
82
+ cp .env.example .env
83
+ # Edit .env with your configuration
84
+ ```
85
+
86
+ 3. **Set up directories**
87
+ ```bash
88
+ mkdir -p services exports logs temp tests data
89
+ ```
90
+
91
+ 4. **Place service files in the services directory**
92
+ ```
93
+ services/
94
+ β”œβ”€β”€ ner_service.py
95
+ β”œβ”€β”€ ocr_service.py
96
+ └── rag_service.py
97
+ ```
98
+
99
+ ## πŸ“ Project Structure
100
+
101
+ ```
102
+ unified-ai-services/
103
+ β”œβ”€β”€ app.py # Main unified application
104
+ β”œβ”€β”€ configs.py # Centralized configuration
105
+ β”œβ”€β”€ setup.py # Automated setup script
106
+ β”œβ”€β”€ requirements.txt # Python dependencies
107
+ β”œβ”€β”€ test_unified.py # Comprehensive test suite
108
+ β”œβ”€β”€ .env # Environment configuration
109
+ β”œβ”€β”€ services/ # Individual service files
110
+ β”‚ β”œβ”€β”€ ner_service.py # NER service implementation
111
+ β”‚ β”œβ”€β”€ ocr_service.py # OCR service implementation
112
+ β”‚ └── rag_service.py # RAG service implementation
113
+ β”œβ”€β”€ exports/ # Generated export files
114
+ β”œβ”€β”€ logs/ # Application logs
115
+ β”œβ”€β”€ temp/ # Temporary files
116
+ β”œβ”€β”€ tests/ # Additional test files
117
+ └── data/ # Data files
118
+ ```
119
+
120
+ ## βš™οΈ Configuration
121
+
122
+ ### Environment Variables
123
+
124
+ The system uses a `.env` file for configuration. Key variables include:
125
+
126
+ #### Server Configuration
127
+ ```bash
128
+ HOST=0.0.0.0
129
+ DEBUG=True
130
+ MAIN_PORT=8000
131
+ NER_PORT=8500
132
+ OCR_PORT=8400
133
+ RAG_PORT=8401
134
+ ```
135
+
136
+ #### Database Configuration
137
+ ```bash
138
+ POSTGRES_HOST=your-postgres-server.com
139
+ POSTGRES_PORT=5432
140
+ POSTGRES_USER=your-username
141
+ POSTGRES_PASSWORD=your-password
142
+ POSTGRES_DATABASE=postgres
143
+ ```
144
+
145
+ #### Azure OpenAI Configuration
146
+ ```bash
147
+ AZURE_OPENAI_ENDPOINT=https://your-openai.openai.azure.com/
148
+ AZURE_OPENAI_API_KEY=your-api-key
149
+ EMBEDDING_MODEL=text-embedding-3-large
150
+ ```
151
+
152
+ #### DeepSeek Configuration
153
+ ```bash
154
+ DEEPSEEK_ENDPOINT=https://your-deepseek-endpoint/
155
+ DEEPSEEK_API_KEY=your-deepseek-key
156
+ DEEPSEEK_MODEL=DeepSeek-R1-0528
157
+ ```
158
+
159
+ #### Azure Document Intelligence Configuration
160
+ ```bash
161
+ AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://your-di.cognitiveservices.azure.com/
162
+ AZURE_DOCUMENT_INTELLIGENCE_KEY=your-di-key
163
+ ```
164
+
165
+ #### Azure Storage Configuration
166
+ ```bash
167
+ AZURE_STORAGE_ACCOUNT_URL=https://yourstorage.blob.core.windows.net/
168
+ AZURE_BLOB_SAS_TOKEN=your-sas-token
169
+ BLOB_CONTAINER=historylog
170
+ ```
171
+
172
+ ## πŸ”§ API Documentation
173
+
174
+ Once running, access the interactive API documentation:
175
+ - **Unified API**: http://localhost:8000/docs
176
+ - **NER Service**: http://localhost:8500/docs
177
+ - **OCR Service**: http://localhost:8400/docs
178
+ - **RAG Service**: http://localhost:8401/docs
179
+
180
+ ## 🎯 API Usage Examples
181
+
182
+ ### 1. Unified Analysis (Text + RAG Indexing)
183
+
184
+ ```python
185
+ import httpx
186
+
187
+ async def unified_analysis():
188
+ data = {
189
+ "text": "Your text content here...",
190
+ "extract_relationships": True,
191
+ "include_embeddings": False,
192
+ "generate_graph_files": True,
193
+ "export_formats": ["neo4j", "json"],
194
+ "enable_rag_indexing": True,
195
+ "rag_title": "My Document",
196
+ "rag_keywords": ["keyword1", "keyword2"]
197
+ }
198
+
199
+ async with httpx.AsyncClient() as client:
200
+ response = await client.post("http://localhost:8000/analyze/unified", json=data)
201
+ return response.json()
202
+ ```
203
+
204
+ ### 2. Combined Search with NER Analysis
205
+
206
+ ```python
207
+ async def combined_search():
208
+ data = {
209
+ "query": "search query here",
210
+ "limit": 10,
211
+ "similarity_threshold": 0.2,
212
+ "include_ner_analysis": True
213
+ }
214
+
215
+ async with httpx.AsyncClient() as client:
216
+ response = await client.post("http://localhost:8000/search/combined", json=data)
217
+ return response.json()
218
+ ```
219
+
220
+ ### 3. File Upload Analysis
221
+
222
+ ```python
223
+ async def analyze_file():
224
+ files = {"file": ("document.pdf", open("document.pdf", "rb"), "application/pdf")}
225
+ data = {
226
+ "extract_relationships": "true",
227
+ "generate_graph_files": "true",
228
+ "export_formats": "neo4j,json"
229
+ }
230
+
231
+ async with httpx.AsyncClient() as client:
232
+ response = await client.post("http://localhost:8000/ner/analyze/file", files=files, data=data)
233
+ return response.json()
234
+ ```
235
+
236
+ ## πŸ§ͺ Testing
237
+
238
+ ### Comprehensive Test Suite
239
+
240
+ The project includes comprehensive tests covering:
241
+ - βœ… Service health checks
242
+ - βœ… Individual service functionality
243
+ - βœ… Unified workflow testing
244
+ - βœ… Service proxy functionality
245
+ - βœ… Error handling and resilience
246
+ - βœ… Performance testing
247
+ - βœ… File upload/download testing
248
+
249
+ Run tests with:
250
+ ```bash
251
+ python test_unified.py
252
+ ```
253
+
254
+ ### Individual Service Tests
255
+
256
+ Test individual services:
257
+ ```bash
258
+ # Test NER service
259
+ python test_ner.py
260
+
261
+ # Test RAG service
262
+ python test_rag.py
263
+ ```
264
+
265
+ ### Quick Health Check
266
+
267
+ ```bash
268
+ curl http://localhost:8000/health
269
+ ```
270
+
271
+ ## πŸ” Monitoring and Health Checks
272
+
273
+ ### Health Endpoints
274
+ - **Unified System**: `GET /health`
275
+ - **Individual Services**: `GET /ner/health`, `GET /ocr/health`, `GET /rag/health`
276
+ - **Detailed Status**: `GET /status`
277
+ - **Service Discovery**: `GET /services`
278
+
279
+ ### Monitoring Features
280
+ - Real-time service health monitoring
281
+ - Response time tracking
282
+ - Service uptime monitoring
283
+ - Error rate tracking
284
+ - Resource usage monitoring
285
+
286
+ ## πŸ“Š Service Architecture
287
+
288
+ ```mermaid
289
+ graph TB
290
+ Client[Client Applications]
291
+
292
+ subgraph "Unified AI Services (Port 8000)"
293
+ UA[Unified App]
294
+ Proxy[Service Proxies]
295
+ Health[Health Monitor]
296
+ end
297
+
298
+ subgraph "Core Services"
299
+ NER[NER Service<br/>Port 8500]
300
+ OCR[OCR Service<br/>Port 8400]
301
+ RAG[RAG Service<br/>Port 8401]
302
+ end
303
+
304
+ subgraph "External Services"
305
+ Azure[Azure Services]
306
+ DeepSeek[DeepSeek API]
307
+ DB[(PostgreSQL)]
308
+ end
309
+
310
+ Client --> UA
311
+ UA --> Proxy
312
+ Proxy --> NER
313
+ Proxy --> OCR
314
+ Proxy --> RAG
315
+
316
+ NER --> Azure
317
+ NER --> DeepSeek
318
+ NER --> DB
319
+
320
+ OCR --> Azure
321
+
322
+ RAG --> Azure
323
+ RAG --> DB
324
+ RAG --> OCR
325
+ ```
326
+
327
+ ## πŸ› οΈ Development
328
+
329
+ ### Adding New Features
330
+
331
+ 1. **Service Modifications**: Update individual service files in `services/`
332
+ 2. **Unified Workflows**: Modify `app.py` for new combined workflows
333
+ 3. **Configuration**: Update `configs.py` for new settings
334
+ 4. **Tests**: Add tests to `test_unified.py`
335
+
336
+ ### Debugging
337
+
338
+ 1. **Check Service Logs**: Services log to console
339
+ 2. **Health Checks**: Use `/health` endpoints
340
+ 3. **Configuration**: Run `python configs.py` to validate
341
+ 4. **Database**: Check PostgreSQL connectivity
342
+ 5. **Azure Services**: Verify API keys and endpoints
343
+
344
+ ### Service Management
345
+
346
+ Start individual services for development:
347
+ ```bash
348
+ # Start NER service only
349
+ cd services && python ner_service.py
350
+
351
+ # Start OCR service only
352
+ cd services && python ocr_service.py
353
+
354
+ # Start RAG service only
355
+ cd services && python rag_service.py
356
+ ```
357
+
358
+ ## 🚨 Troubleshooting
359
+
360
+ ### Common Issues
361
+
362
+ #### 1. Services Won't Start
363
+ - Check port availability: `netstat -an | grep :8000`
364
+ - Verify Python dependencies: `pip list`
365
+ - Check .env configuration: `python configs.py`
366
+
367
+ #### 2. Database Connection Issues
368
+ - Verify PostgreSQL is running
369
+ - Check connection string in .env
370
+ - Test connectivity: `python -c "import asyncpg; asyncio.run(asyncpg.connect('your-connection-string'))"`
371
+
372
+ #### 3. Azure Service Issues
373
+ - Verify API keys and endpoints
374
+ - Check Azure service status
375
+ - Review rate limits and quotas
376
+
377
+ #### 4. Performance Issues
378
+ - Monitor resource usage: `top` or Task Manager
379
+ - Check database performance
380
+ - Review log files for errors
381
+
382
+ ### Error Codes
383
+
384
+ - **500**: Internal service error
385
+ - **503**: Service unavailable
386
+ - **400**: Bad request (check input data)
387
+ - **422**: Validation error
388
+ - **404**: Endpoint not found
389
+
390
+ ## πŸ“ˆ Performance Optimization
391
+
392
+ ### Recommended Settings
393
+
394
+ #### Production Configuration
395
+ ```bash
396
+ DEBUG=False
397
+ MAX_FILE_SIZE=50
398
+ REQUEST_TIMEOUT=300
399
+ CHUNK_SIZE=1000
400
+ CHUNK_OVERLAP=200
401
+ ```
402
+
403
+ #### Database Optimization
404
+ - Use connection pooling
405
+ - Configure appropriate indexes
406
+ - Monitor query performance
407
+ - Regular maintenance
408
+
409
+ #### Service Optimization
410
+ - Enable caching where appropriate
411
+ - Use async operations
412
+ - Optimize batch processing
413
+ - Monitor memory usage
414
+
415
+ ## πŸ” Security Considerations
416
+
417
+ ### API Security
418
+ - Implement authentication/authorization as needed
419
+ - Use HTTPS in production
420
+ - Validate all input data
421
+ - Rate limiting
422
+
423
+ ### Data Security
424
+ - Secure database connections (SSL)
425
+ - Encrypt sensitive data
426
+ - Regular security updates
427
+ - Monitor access logs
428
+
429
+ ### Azure Security
430
+ - Rotate API keys regularly
431
+ - Use managed identities where possible
432
+ - Monitor usage and costs
433
+ - Follow Azure security best practices
434
+
435
+ ## πŸ“ License
436
+
437
+ This project is licensed under the MIT License - see the LICENSE file for details.
438
+
439
+ ## 🀝 Contributing
440
+
441
+ 1. Fork the repository
442
+ 2. Create a feature branch
443
+ 3. Make your changes
444
+ 4. Add tests for new functionality
445
+ 5. Run the test suite
446
+ 6. Submit a pull request
447
+
448
+ ## πŸ“ž Support
449
+
450
+ For support and questions:
451
+ 1. Check this README for common issues
452
+ 2. Review the test suite for usage examples
453
+ 3. Check service logs for error details
454
+ 4. Verify configuration with `python configs.py`
455
+
456
+ ## 🎯 Roadmap
457
+
458
+ ### Current Version (1.0.0)
459
+ - βœ… Unified service integration
460
+ - βœ… Comprehensive testing
461
+ - βœ… Multi-language support
462
+ - βœ… Graph database exports
463
+
464
+ ### Future Enhancements
465
+ - πŸ”„ Advanced caching mechanisms
466
+ - πŸ”„ Enhanced monitoring and analytics
467
+ - πŸ”„ Additional export formats
468
+ - πŸ”„ Improved error recovery
469
+ - πŸ”„ Performance optimizations
470
+ - πŸ”„ Additional language support
app.py ADDED
@@ -0,0 +1,808 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unified AI Services Application
4
+ Coordinates NER, OCR, and RAG services with combined workflows
5
+ """
6
+
7
+ import asyncio
8
+ import subprocess
9
+ import signal
10
+ import sys
11
+ import os
12
+ import time
13
+ import json
14
+ import logging
15
+ from pathlib import Path
16
+ from typing import Dict, List, Optional, Any, Union
17
+ from contextlib import asynccontextmanager
18
+ from datetime import datetime
19
+ import tempfile
20
+ import io
21
+
22
+ import httpx
23
+ import uvicorn
24
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form, BackgroundTasks, Query
25
+ from fastapi.middleware.cors import CORSMiddleware
26
+ from fastapi.responses import FileResponse, StreamingResponse
27
+ from pydantic import BaseModel, HttpUrl
28
+ import psutil
29
+
30
+ # Import our configuration
31
+ from configs import get_config, validate_environment
32
+
33
+ # Get configuration
34
+ config = get_config()
35
+
36
+ # Setup logging
37
+ logging.basicConfig(
38
+ level=logging.INFO,
39
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
40
+ )
41
+ logger = logging.getLogger(__name__)
42
+
43
+ # Global service processes
44
+ service_processes: Dict[str, subprocess.Popen] = {}
45
+ service_health: Dict[str, bool] = {}
46
+
47
+ # Pydantic Models for Unified API
48
+ class ServiceStatus(BaseModel):
49
+ name: str
50
+ status: str
51
+ port: int
52
+ health: bool
53
+ uptime: Optional[float] = None
54
+ response_time: Optional[float] = None
55
+
56
+ class UnifiedAnalysisRequest(BaseModel):
57
+ text: Optional[str] = None
58
+ url: Optional[HttpUrl] = None
59
+ extract_relationships: bool = True
60
+ include_embeddings: bool = True
61
+ include_summary: bool = True
62
+ generate_graph_files: bool = True
63
+ export_formats: List[str] = ["neo4j", "json", "graphml"]
64
+ enable_rag_indexing: bool = False
65
+ rag_title: Optional[str] = None
66
+ rag_keywords: Optional[List[str]] = None
67
+ rag_metadata: Optional[Dict[str, Any]] = None
68
+
69
+ class CombinedSearchRequest(BaseModel):
70
+ query: str
71
+ limit: int = 10
72
+ similarity_threshold: float = 0.2
73
+ include_ner_analysis: bool = True
74
+ ner_export_formats: List[str] = ["json"]
75
+
76
+ class UnifiedResponse(BaseModel):
77
+ success: bool
78
+ service_calls: List[str]
79
+ ner_analysis: Optional[Dict[str, Any]] = None
80
+ rag_document: Optional[Dict[str, Any]] = None
81
+ search_results: Optional[Dict[str, Any]] = None
82
+ processing_time: float
83
+ error: Optional[str] = None
84
+
85
+ # Service Management Functions
86
+ async def start_service(service_name: str, script_path: str, port: int) -> bool:
87
+ """Start a service as a subprocess"""
88
+ try:
89
+ logger.info(f"πŸš€ Starting {service_name} service on port {port}")
90
+
91
+ # Check if port is already in use
92
+ if is_port_in_use(port):
93
+ logger.warning(f"Port {port} is already in use. Assuming {service_name} is already running.")
94
+ return True
95
+
96
+ # Start the service
97
+ if sys.platform == "win32":
98
+ process = subprocess.Popen([
99
+ sys.executable, script_path
100
+ ], creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
101
+ else:
102
+ process = subprocess.Popen([
103
+ sys.executable, script_path
104
+ ], preexec_fn=os.setsid)
105
+
106
+ service_processes[service_name] = process
107
+
108
+ # Wait for service to start
109
+ for i in range(30): # 30 second timeout
110
+ await asyncio.sleep(1)
111
+ if await check_service_health(service_name, port):
112
+ logger.info(f"βœ… {service_name} service started successfully")
113
+ service_health[service_name] = True
114
+ return True
115
+
116
+ logger.error(f"❌ {service_name} service failed to start within timeout")
117
+ return False
118
+
119
+ except Exception as e:
120
+ logger.error(f"❌ Failed to start {service_name} service: {e}")
121
+ return False
122
+
123
+ def is_port_in_use(port: int) -> bool:
124
+ """Check if a port is already in use"""
125
+ try:
126
+ for conn in psutil.net_connections():
127
+ if conn.laddr.port == port:
128
+ return True
129
+ return False
130
+ except:
131
+ return False
132
+
133
+ async def check_service_health(service_name: str, port: int) -> bool:
134
+ """Check if a service is healthy"""
135
+ try:
136
+ async with httpx.AsyncClient() as client:
137
+ response = await client.get(
138
+ f"http://localhost:{port}/health",
139
+ timeout=5.0
140
+ )
141
+ return response.status_code == 200
142
+ except:
143
+ return False
144
+
145
+ async def get_service_status(service_name: str, port: int) -> ServiceStatus:
146
+ """Get detailed status of a service"""
147
+ start_time = time.time()
148
+ health = await check_service_health(service_name, port)
149
+ response_time = time.time() - start_time
150
+
151
+ uptime = None
152
+ if service_name in service_processes:
153
+ process = service_processes[service_name]
154
+ if process.poll() is None: # Process is running
155
+ try:
156
+ uptime = time.time() - psutil.Process(process.pid).create_time()
157
+ except:
158
+ uptime = None
159
+
160
+ return ServiceStatus(
161
+ name=service_name,
162
+ status="running" if health else "down",
163
+ port=port,
164
+ health=health,
165
+ uptime=uptime,
166
+ response_time=response_time
167
+ )
168
+
169
+ async def stop_all_services():
170
+ """Stop all managed services"""
171
+ logger.info("πŸ›‘ Stopping all services...")
172
+
173
+ for service_name, process in service_processes.items():
174
+ try:
175
+ if process.poll() is None: # Process is running
176
+ logger.info(f"Stopping {service_name}...")
177
+
178
+ if sys.platform == "win32":
179
+ process.send_signal(signal.CTRL_BREAK_EVENT)
180
+ else:
181
+ os.killpg(os.getpgid(process.pid), signal.SIGTERM)
182
+
183
+ # Wait for graceful shutdown
184
+ try:
185
+ process.wait(timeout=10)
186
+ except subprocess.TimeoutExpired:
187
+ logger.warning(f"Force killing {service_name}")
188
+ process.kill()
189
+
190
+ logger.info(f"βœ… {service_name} stopped")
191
+ except Exception as e:
192
+ logger.error(f"Error stopping {service_name}: {e}")
193
+
194
+ # Service Communication Functions
195
+ async def call_ner_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
196
+ """Call NER service endpoint"""
197
+ try:
198
+ async with httpx.AsyncClient(timeout=300.0) as client:
199
+ url = f"{config.NER_SERVICE_URL}{endpoint}"
200
+ response = await client.request(method, url, **kwargs)
201
+
202
+ if response.status_code == 200:
203
+ return response.json()
204
+ else:
205
+ raise HTTPException(status_code=response.status_code, detail=response.text)
206
+
207
+ except httpx.RequestError as e:
208
+ raise HTTPException(status_code=503, detail=f"NER service unavailable: {e}")
209
+
210
+ async def call_ocr_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
211
+ """Call OCR service endpoint"""
212
+ try:
213
+ async with httpx.AsyncClient(timeout=300.0) as client:
214
+ url = f"{config.OCR_SERVICE_URL}{endpoint}"
215
+ response = await client.request(method, url, **kwargs)
216
+
217
+ if response.status_code == 200:
218
+ return response.json()
219
+ else:
220
+ raise HTTPException(status_code=response.status_code, detail=response.text)
221
+
222
+ except httpx.RequestError as e:
223
+ raise HTTPException(status_code=503, detail=f"OCR service unavailable: {e}")
224
+
225
+ async def call_rag_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
226
+ """Call RAG service endpoint"""
227
+ try:
228
+ async with httpx.AsyncClient(timeout=300.0) as client:
229
+ url = f"{config.RAG_SERVICE_URL}{endpoint}"
230
+ response = await client.request(method, url, **kwargs)
231
+
232
+ if response.status_code == 200:
233
+ return response.json()
234
+ else:
235
+ raise HTTPException(status_code=response.status_code, detail=response.text)
236
+
237
+ except httpx.RequestError as e:
238
+ raise HTTPException(status_code=503, detail=f"RAG service unavailable: {e}")
239
+
240
+ # Application Lifecycle
241
+ @asynccontextmanager
242
+ async def lifespan(app: FastAPI):
243
+ """Application lifespan management"""
244
+ logger.info("πŸš€ Starting Unified AI Services Application")
245
+
246
+ # Print configuration summary
247
+ config.print_configuration_summary()
248
+
249
+ # Validate environment
250
+ if not validate_environment():
251
+ logger.error("❌ Environment validation failed. Please check your configuration.")
252
+ raise RuntimeError("Invalid environment configuration")
253
+
254
+ # Define service paths
255
+ service_definitions = [
256
+ ("ocr", "services/ocr_service.py", config.ocr.PORT),
257
+ ("rag", "services/rag_service.py", config.rag.PORT),
258
+ ("ner", "services/ner_service.py", config.ner.PORT)
259
+ ]
260
+
261
+ # Start services
262
+ started_services = []
263
+ for service_name, script_path, port in service_definitions:
264
+ if os.path.exists(script_path):
265
+ success = await start_service(service_name, script_path, port)
266
+ if success:
267
+ started_services.append(service_name)
268
+ else:
269
+ logger.error(f"Failed to start {service_name} service")
270
+ else:
271
+ logger.warning(f"Service script not found: {script_path}")
272
+
273
+ if len(started_services) == 0:
274
+ logger.error("❌ No services could be started")
275
+ raise RuntimeError("Failed to start any services")
276
+
277
+ logger.info(f"βœ… Started {len(started_services)} services: {', '.join(started_services)}")
278
+
279
+ # Yield control to the application
280
+ yield
281
+
282
+ # Cleanup
283
+ await stop_all_services()
284
+ logger.info("🏁 Unified AI Services Application shutdown complete")
285
+
286
+ # FastAPI Application
287
+ app = FastAPI(
288
+ title="Unified AI Services",
289
+ description="Coordinated NER, OCR, and RAG services with combined workflows",
290
+ version="1.0.0",
291
+ lifespan=lifespan
292
+ )
293
+
294
+ # CORS configuration
295
+ allowed_origins = config.ner.ALLOWED_ORIGINS
296
+ if allowed_origins != "*":
297
+ try:
298
+ allowed_origins = json.loads(allowed_origins)
299
+ except:
300
+ allowed_origins = ["*"]
301
+
302
+ app.add_middleware(
303
+ CORSMiddleware,
304
+ allow_origins=allowed_origins,
305
+ allow_credentials=True,
306
+ allow_methods=["*"],
307
+ allow_headers=["*"],
308
+ )
309
+
310
+ # Main API Endpoints
311
+ @app.get("/")
312
+ async def root():
313
+ return {
314
+ "message": "Unified AI Services",
315
+ "version": "1.0.0",
316
+ "services": {
317
+ "ner": f"{config.NER_SERVICE_URL}",
318
+ "ocr": f"{config.OCR_SERVICE_URL}",
319
+ "rag": f"{config.RAG_SERVICE_URL}"
320
+ },
321
+ "unified_endpoints": {
322
+ "status": "/status",
323
+ "analyze": "/analyze",
324
+ "search": "/search",
325
+ "combined": "/combined/*"
326
+ }
327
+ }
328
+
329
+ @app.get("/health")
330
+ async def unified_health():
331
+ """Unified health check for all services"""
332
+ services = [
333
+ ("ner", config.ner.PORT),
334
+ ("ocr", config.ocr.PORT),
335
+ ("rag", config.rag.PORT)
336
+ ]
337
+
338
+ service_statuses = []
339
+ overall_healthy = True
340
+
341
+ for service_name, port in services:
342
+ status = await get_service_status(service_name, port)
343
+ service_statuses.append(status.dict())
344
+ if not status.health:
345
+ overall_healthy = False
346
+
347
+ return {
348
+ "status": "healthy" if overall_healthy else "degraded",
349
+ "services": service_statuses,
350
+ "timestamp": datetime.utcnow().isoformat(),
351
+ "configuration": {
352
+ "ner_url": config.NER_SERVICE_URL,
353
+ "ocr_url": config.OCR_SERVICE_URL,
354
+ "rag_url": config.RAG_SERVICE_URL
355
+ }
356
+ }
357
+
358
+ @app.get("/status")
359
+ async def detailed_status():
360
+ """Detailed status of all services"""
361
+ services = [
362
+ ("ner", config.ner.PORT),
363
+ ("ocr", config.ocr.PORT),
364
+ ("rag", config.rag.PORT)
365
+ ]
366
+
367
+ detailed_statuses = {}
368
+
369
+ for service_name, port in services:
370
+ try:
371
+ # Get service-specific health data
372
+ async with httpx.AsyncClient() as client:
373
+ response = await client.get(f"http://localhost:{port}/health", timeout=10.0)
374
+ if response.status_code == 200:
375
+ detailed_statuses[service_name] = response.json()
376
+ else:
377
+ detailed_statuses[service_name] = {"status": "error", "error": f"HTTP {response.status_code}"}
378
+ except Exception as e:
379
+ detailed_statuses[service_name] = {"status": "unreachable", "error": str(e)}
380
+
381
+ return {
382
+ "unified_app": {
383
+ "status": "running",
384
+ "port": config.MAIN_PORT,
385
+ "uptime": time.time() - start_time if 'start_time' in globals() else 0
386
+ },
387
+ "services": detailed_statuses,
388
+ "configuration_valid": validate_environment()
389
+ }
390
+
391
+ # Unified Analysis Endpoints
392
+ @app.post("/analyze/unified")
393
+ async def unified_analysis(request: UnifiedAnalysisRequest):
394
+ """Unified analysis combining NER and optional RAG indexing"""
395
+ start_time = time.time()
396
+ service_calls = []
397
+
398
+ try:
399
+ # Step 1: NER Analysis
400
+ ner_data = {
401
+ "text": request.text,
402
+ "url": str(request.url) if request.url else None,
403
+ "extract_relationships": request.extract_relationships,
404
+ "include_embeddings": request.include_embeddings,
405
+ "include_summary": request.include_summary,
406
+ "generate_graph_files": request.generate_graph_files,
407
+ "export_formats": request.export_formats
408
+ }
409
+
410
+ # Remove None values
411
+ ner_data = {k: v for k, v in ner_data.items() if v is not None}
412
+
413
+ if request.text:
414
+ ner_result = await call_ner_service("/analyze/text", "POST", json=ner_data)
415
+ service_calls.append("ner_text")
416
+ elif request.url:
417
+ ner_result = await call_ner_service("/analyze/url", "POST", json=ner_data)
418
+ service_calls.append("ner_url")
419
+ else:
420
+ raise HTTPException(status_code=400, detail="Either text or url must be provided")
421
+
422
+ # Step 2: Optional RAG indexing
423
+ rag_result = None
424
+ if request.enable_rag_indexing and ner_result.get("success"):
425
+ try:
426
+ rag_data = {
427
+ "title": request.rag_title or f"NER Analysis {ner_result.get('analysis_id', 'unknown')}",
428
+ "keywords": request.rag_keywords or ner_result.get("keywords", []),
429
+ "metadata": {
430
+ **(request.rag_metadata or {}),
431
+ "ner_analysis_id": ner_result.get("analysis_id"),
432
+ "entity_count": len(ner_result.get("entities", [])),
433
+ "relationship_count": len(ner_result.get("relationships", []))
434
+ }
435
+ }
436
+
437
+ if request.text:
438
+ # Create temporary file for RAG service
439
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
440
+ f.write(request.text)
441
+ temp_path = f.name
442
+
443
+ try:
444
+ with open(temp_path, 'rb') as f:
445
+ files = {"file": ("ner_analysis.txt", f, "text/plain")}
446
+ form_data = {
447
+ "title": rag_data["title"],
448
+ "keywords": json.dumps(rag_data["keywords"]),
449
+ "metadata": json.dumps(rag_data["metadata"])
450
+ }
451
+
452
+ async with httpx.AsyncClient(timeout=300.0) as client:
453
+ response = await client.post(
454
+ f"{config.RAG_SERVICE_URL}/documents/upload",
455
+ files=files,
456
+ data=form_data
457
+ )
458
+ if response.status_code == 200:
459
+ rag_result = response.json()
460
+ service_calls.append("rag_upload")
461
+ finally:
462
+ os.unlink(temp_path)
463
+
464
+ elif request.url:
465
+ async with httpx.AsyncClient(timeout=300.0) as client:
466
+ response = await client.post(
467
+ f"{config.RAG_SERVICE_URL}/documents/url",
468
+ json={
469
+ "url": str(request.url),
470
+ **rag_data,
471
+ "extract_images": True
472
+ }
473
+ )
474
+ if response.status_code == 200:
475
+ rag_result = response.json()
476
+ service_calls.append("rag_url")
477
+
478
+ except Exception as e:
479
+ logger.warning(f"RAG indexing failed: {e}")
480
+ # Continue without RAG result
481
+
482
+ processing_time = time.time() - start_time
483
+
484
+ return UnifiedResponse(
485
+ success=True,
486
+ service_calls=service_calls,
487
+ ner_analysis=ner_result,
488
+ rag_document=rag_result,
489
+ processing_time=processing_time
490
+ )
491
+
492
+ except Exception as e:
493
+ processing_time = time.time() - start_time
494
+ logger.error(f"Unified analysis failed: {e}")
495
+
496
+ return UnifiedResponse(
497
+ success=False,
498
+ service_calls=service_calls,
499
+ processing_time=processing_time,
500
+ error=str(e)
501
+ )
502
+
503
+ @app.post("/search/combined")
504
+ async def combined_search(request: CombinedSearchRequest):
505
+ """Combined search using RAG with optional NER analysis of results"""
506
+ start_time = time.time()
507
+ service_calls = []
508
+
509
+ try:
510
+ # Step 1: RAG Search
511
+ search_data = {
512
+ "query": request.query,
513
+ "limit": request.limit,
514
+ "similarity_threshold": request.similarity_threshold
515
+ }
516
+
517
+ search_result = await call_rag_service("/search", "POST", json=search_data)
518
+ service_calls.append("rag_search")
519
+
520
+ # Step 2: Optional NER analysis of search results
521
+ ner_results = []
522
+ if request.include_ner_analysis and search_result.get("results"):
523
+ for i, result in enumerate(search_result["results"][:3]): # Analyze top 3 results
524
+ chunk_content = result.get("chunk", {}).get("content", "")
525
+ if chunk_content:
526
+ try:
527
+ ner_data = {
528
+ "text": chunk_content,
529
+ "extract_relationships": True,
530
+ "include_embeddings": False,
531
+ "include_summary": False,
532
+ "generate_graph_files": False,
533
+ "export_formats": request.ner_export_formats
534
+ }
535
+
536
+ ner_result = await call_ner_service("/analyze/text", "POST", json=ner_data)
537
+ ner_results.append({
538
+ "result_index": i,
539
+ "ner_analysis": ner_result
540
+ })
541
+ service_calls.append(f"ner_text_{i}")
542
+
543
+ except Exception as e:
544
+ logger.warning(f"NER analysis failed for result {i}: {e}")
545
+
546
+ processing_time = time.time() - start_time
547
+
548
+ return UnifiedResponse(
549
+ success=True,
550
+ service_calls=service_calls,
551
+ search_results={
552
+ **search_result,
553
+ "ner_analyses": ner_results
554
+ },
555
+ processing_time=processing_time
556
+ )
557
+
558
+ except Exception as e:
559
+ processing_time = time.time() - start_time
560
+ logger.error(f"Combined search failed: {e}")
561
+
562
+ return UnifiedResponse(
563
+ success=False,
564
+ service_calls=service_calls,
565
+ processing_time=processing_time,
566
+ error=str(e)
567
+ )
568
+
569
+ # Service Proxy Endpoints
570
+ @app.api_route("/ner/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
571
+ async def ner_proxy(path: str, request):
572
+ """Proxy requests to NER service"""
573
+ try:
574
+ async with httpx.AsyncClient(timeout=300.0) as client:
575
+ url = f"{config.NER_SERVICE_URL}/{path}"
576
+
577
+ # Forward the request
578
+ if request.method == "GET":
579
+ response = await client.get(url, params=request.query_params)
580
+ else:
581
+ # Handle different content types
582
+ content_type = request.headers.get("content-type", "")
583
+
584
+ if "multipart/form-data" in content_type:
585
+ # Handle file uploads
586
+ form = await request.form()
587
+ files = {}
588
+ data = {}
589
+
590
+ for key, value in form.items():
591
+ if hasattr(value, 'read'): # File-like object
592
+ files[key] = (value.filename, await value.read(), value.content_type)
593
+ else:
594
+ data[key] = value
595
+
596
+ response = await client.request(request.method, url, files=files, data=data)
597
+ else:
598
+ # Handle JSON/other content
599
+ body = await request.body()
600
+ response = await client.request(
601
+ request.method,
602
+ url,
603
+ content=body,
604
+ headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
605
+ )
606
+
607
+ # Return response
608
+ return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
609
+
610
+ except httpx.RequestError as e:
611
+ raise HTTPException(status_code=503, detail=f"NER service unavailable: {e}")
612
+
613
+ @app.api_route("/ocr/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
614
+ async def ocr_proxy(path: str, request):
615
+ """Proxy requests to OCR service"""
616
+ try:
617
+ async with httpx.AsyncClient(timeout=300.0) as client:
618
+ url = f"{config.OCR_SERVICE_URL}/{path}"
619
+
620
+ # Forward the request
621
+ if request.method == "GET":
622
+ response = await client.get(url, params=request.query_params)
623
+ else:
624
+ # Handle different content types
625
+ content_type = request.headers.get("content-type", "")
626
+
627
+ if "multipart/form-data" in content_type:
628
+ # Handle file uploads
629
+ form = await request.form()
630
+ files = {}
631
+ data = {}
632
+
633
+ for key, value in form.items():
634
+ if hasattr(value, 'read'): # File-like object
635
+ files[key] = (value.filename, await value.read(), value.content_type)
636
+ else:
637
+ data[key] = value
638
+
639
+ response = await client.request(request.method, url, files=files, data=data)
640
+ else:
641
+ # Handle JSON/other content
642
+ body = await request.body()
643
+ response = await client.request(
644
+ request.method,
645
+ url,
646
+ content=body,
647
+ headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
648
+ )
649
+
650
+ # Return response
651
+ return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
652
+
653
+ except httpx.RequestError as e:
654
+ raise HTTPException(status_code=503, detail=f"OCR service unavailable: {e}")
655
+
656
+ @app.api_route("/rag/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
657
+ async def rag_proxy(path: str, request):
658
+ """Proxy requests to RAG service"""
659
+ try:
660
+ async with httpx.AsyncClient(timeout=300.0) as client:
661
+ url = f"{config.RAG_SERVICE_URL}/{path}"
662
+
663
+ # Forward the request
664
+ if request.method == "GET":
665
+ response = await client.get(url, params=request.query_params)
666
+ else:
667
+ # Handle different content types
668
+ content_type = request.headers.get("content-type", "")
669
+
670
+ if "multipart/form-data" in content_type:
671
+ # Handle file uploads
672
+ form = await request.form()
673
+ files = {}
674
+ data = {}
675
+
676
+ for key, value in form.items():
677
+ if hasattr(value, 'read'): # File-like object
678
+ files[key] = (value.filename, await value.read(), value.content_type)
679
+ else:
680
+ data[key] = value
681
+
682
+ response = await client.request(request.method, url, files=files, data=data)
683
+ else:
684
+ # Handle JSON/other content
685
+ body = await request.body()
686
+ response = await client.request(
687
+ request.method,
688
+ url,
689
+ content=body,
690
+ headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
691
+ )
692
+
693
+ # Return response
694
+ return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
695
+
696
+ except httpx.RequestError as e:
697
+ raise HTTPException(status_code=503, detail=f"RAG service unavailable: {e}")
698
+
699
+ # Convenience endpoints (direct service access)
700
+ @app.get("/analyze/text")
701
+ @app.post("/analyze/text")
702
+ async def analyze_text_direct(request=None):
703
+ """Direct access to NER text analysis"""
704
+ if request:
705
+ return await call_ner_service("/analyze/text", "POST", json=await request.json())
706
+ else:
707
+ return {"message": "Use POST method with text data"}
708
+
709
+ @app.get("/documents")
710
+ async def list_documents():
711
+ """Direct access to RAG document listing"""
712
+ return await call_rag_service("/documents", "GET")
713
+
714
+ @app.post("/search")
715
+ async def search_direct(request):
716
+ """Direct access to RAG search"""
717
+ return await call_rag_service("/search", "POST", json=await request.json())
718
+
719
+ # Utility endpoints
720
+ @app.get("/services")
721
+ async def list_services():
722
+ """List all available services and their endpoints"""
723
+ return {
724
+ "services": {
725
+ "ner": {
726
+ "url": config.NER_SERVICE_URL,
727
+ "description": "Named Entity Recognition with relationship extraction",
728
+ "endpoints": [
729
+ "/analyze/text", "/analyze/file", "/analyze/url", "/analyze/multi",
730
+ "/download/{analysis_id}/{file_type}", "/statistics", "/entity-types", "/relationship-types"
731
+ ]
732
+ },
733
+ "ocr": {
734
+ "url": config.OCR_SERVICE_URL,
735
+ "description": "Optical Character Recognition with document processing",
736
+ "endpoints": [
737
+ "/ocr/upload", "/ocr/url", "/ocr/analyze"
738
+ ]
739
+ },
740
+ "rag": {
741
+ "url": config.RAG_SERVICE_URL,
742
+ "description": "Retrieval-Augmented Generation with vector search",
743
+ "endpoints": [
744
+ "/documents/upload", "/documents/url", "/search", "/documents", "/documents/{id}"
745
+ ]
746
+ }
747
+ },
748
+ "unified": {
749
+ "url": f"http://localhost:{config.MAIN_PORT}",
750
+ "description": "Unified interface for combined workflows",
751
+ "endpoints": [
752
+ "/analyze/unified", "/search/combined", "/ner/*", "/ocr/*", "/rag/*"
753
+ ]
754
+ }
755
+ }
756
+
757
+ # Signal handlers for graceful shutdown
758
+ def signal_handler(signum, frame):
759
+ """Handle shutdown signals"""
760
+ logger.info(f"Received signal {signum}, initiating graceful shutdown...")
761
+ asyncio.create_task(stop_all_services())
762
+
763
+ # Register signal handlers
764
+ signal.signal(signal.SIGINT, signal_handler)
765
+ signal.signal(signal.SIGTERM, signal_handler)
766
+
767
+ # Store start time for uptime calculation
768
+ start_time = time.time()
769
+
770
+ if __name__ == "__main__":
771
+ print("πŸš€ Starting Unified AI Services Application")
772
+ print("=" * 50)
773
+
774
+ # Validate configuration before starting
775
+ if not validate_environment():
776
+ print("οΏ½οΏ½οΏ½ Configuration validation failed!")
777
+ print("Please check your .env file and ensure all required services are configured.")
778
+ sys.exit(1)
779
+
780
+ print(f"🌐 Main application will run on: http://{config.MAIN_HOST}:{config.MAIN_PORT}")
781
+ print(f"πŸ“Š Services will be started automatically:")
782
+ print(f" β€’ NER Service: http://localhost:{config.ner.PORT}")
783
+ print(f" β€’ OCR Service: http://localhost:{config.ocr.PORT}")
784
+ print(f" β€’ RAG Service: http://localhost:{config.rag.PORT}")
785
+ print("")
786
+ print("🎯 Available endpoints:")
787
+ print(" β€’ Main API: /")
788
+ print(" β€’ Health Check: /health")
789
+ print(" β€’ Unified Analysis: /analyze/unified")
790
+ print(" β€’ Combined Search: /search/combined")
791
+ print(" β€’ Service Proxies: /ner/*, /ocr/*, /rag/*")
792
+ print("")
793
+ print("πŸ“– API Documentation: /docs")
794
+ print("")
795
+
796
+ try:
797
+ uvicorn.run(
798
+ "app:app",
799
+ host=config.MAIN_HOST,
800
+ port=config.MAIN_PORT,
801
+ reload=config.ner.DEBUG,
802
+ log_level="info"
803
+ )
804
+ except KeyboardInterrupt:
805
+ print("\nπŸ›‘ Shutting down gracefully...")
806
+ finally:
807
+ # Cleanup will be handled by the lifespan context manager
808
+ pass
configs.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Centralized Configuration Management for Unified AI Services
4
+ Manages configuration for NER, OCR, and RAG services
5
+ """
6
+
7
+ import os
8
+ import logging
9
+ from pathlib import Path
10
+ from typing import Optional, Dict, Any, List
11
+ from dotenv import load_dotenv
12
+
13
+ # Load environment variables
14
+ env_path = Path(__file__).parent / '.env'
15
+ if env_path.exists():
16
+ load_dotenv(dotenv_path=env_path)
17
+ else:
18
+ load_dotenv() # Load from default location
19
+
20
+ # Setup logging
21
+ logging.basicConfig(
22
+ level=getattr(logging, os.getenv("LOG_LEVEL", "INFO").upper()),
23
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
24
+ )
25
+ logger = logging.getLogger(__name__)
26
+
27
+ class BaseConfig:
28
+ """Base configuration class with common settings"""
29
+
30
+ def __init__(self):
31
+ # Server Configuration
32
+ self.HOST = os.getenv("HOST", "0.0.0.0")
33
+ self.DEBUG = os.getenv("DEBUG", "False").lower() == "true"
34
+
35
+ # Database Configuration (shared by NER and RAG)
36
+ self.POSTGRES_HOST = os.getenv("POSTGRES_HOST", "")
37
+ self.POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", "5432"))
38
+ self.POSTGRES_USER = os.getenv("POSTGRES_USER", "")
39
+ self.POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
40
+ self.POSTGRES_DATABASE = os.getenv("POSTGRES_DATABASE", "postgres")
41
+
42
+ # Azure OpenAI Configuration (shared by NER and RAG)
43
+ self.AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "")
44
+ self.AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "")
45
+ self.EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-large")
46
+ self.AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-3-large")
47
+
48
+ # Azure Storage Configuration (shared by NER and RAG)
49
+ self.AZURE_STORAGE_ACCOUNT_URL = os.getenv("AZURE_STORAGE_ACCOUNT_URL", "")
50
+ self.AZURE_BLOB_SAS_TOKEN = os.getenv("AZURE_BLOB_SAS_TOKEN", "")
51
+ self.BLOB_CONTAINER = os.getenv("BLOB_CONTAINER", "historylog")
52
+
53
+ # Processing Configuration
54
+ self.MAX_FILE_SIZE = int(os.getenv("MAX_FILE_SIZE", "50")) * 1024 * 1024 # Convert MB to bytes
55
+ self.REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "300"))
56
+
57
+ # CORS Configuration
58
+ self.ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*")
59
+
60
+ def validate_azure_openai(self) -> bool:
61
+ """Validate Azure OpenAI configuration"""
62
+ return bool(
63
+ self.AZURE_OPENAI_ENDPOINT and
64
+ self.AZURE_OPENAI_API_KEY and
65
+ self.AZURE_OPENAI_ENDPOINT != "YOUR_AZURE_OPENAI_ENDPOINT" and
66
+ self.AZURE_OPENAI_API_KEY != "YOUR_AZURE_OPENAI_KEY"
67
+ )
68
+
69
+ def validate_postgres(self) -> bool:
70
+ """Validate PostgreSQL configuration"""
71
+ return bool(
72
+ self.POSTGRES_HOST and
73
+ self.POSTGRES_USER and
74
+ self.POSTGRES_PASSWORD and
75
+ self.POSTGRES_DATABASE
76
+ )
77
+
78
+ def validate_azure_storage(self) -> bool:
79
+ """Validate Azure Storage configuration"""
80
+ return bool(
81
+ self.AZURE_STORAGE_ACCOUNT_URL and
82
+ self.AZURE_BLOB_SAS_TOKEN
83
+ )
84
+
85
+ class NERConfig(BaseConfig):
86
+ """Configuration for NER Service"""
87
+
88
+ def __init__(self):
89
+ super().__init__()
90
+ self.PORT = int(os.getenv("NER_PORT", "8500"))
91
+
92
+ # DeepSeek Configuration
93
+ self.DEEPSEEK_ENDPOINT = os.getenv("DEEPSEEK_ENDPOINT", "")
94
+ self.DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
95
+ self.DEEPSEEK_MODEL = os.getenv("DEEPSEEK_MODEL", "DeepSeek-R1-0528")
96
+
97
+ # OCR Service Configuration
98
+ self.OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:8400")
99
+
100
+ # NER Specific Settings
101
+ self.MAX_TEXT_LENGTH = 100000 # 100KB
102
+ self.SUPPORTED_TEXT_FORMATS = {'.txt', '.doc', '.docx', '.rtf'}
103
+ self.SUPPORTED_OCR_FORMATS = {'.pdf', '.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif'}
104
+
105
+ # Entity and Relationship Types
106
+ self.ENTITY_TYPES = [
107
+ "PERSON", "ORGANIZATION", "LOCATION", "DATE", "TIME", "MONEY", "PRODUCT", "EVENT",
108
+ "VEHICLE", "SUSPICIOUS_OBJECT", "ILLEGAL_ACTIVITY", "EVIDENCE", "ILLEGAL_ITEM",
109
+ "WEAPON", "DRUG", "CHEMICAL", "DOCUMENT", "PHONE_NUMBER", "ADDRESS", "EMAIL"
110
+ ]
111
+
112
+ self.RELATIONSHIP_TYPES = [
113
+ # Standard relationships
114
+ "works_for", "founded", "located_in", "part_of", "associated_with", "owns", "manages",
115
+ "leads", "reports_to", "collaborates_with", "partners_with", "supplies_to", "acquires",
116
+ "invests_in", "headquartered_in", "operates_in", "born_in", "lives_in", "studied_at",
117
+ "graduated_from", "worked_at", "visited", "attended", "participated_in", "sponsored",
118
+ "developed", "created", "invented", "discovered", "published", "authored", "edited",
119
+ # Thai relationships
120
+ "ΰΈ—ΰΈ³ΰΈ‡ΰΈ²ΰΈ™ΰΈ—ΰΈ΅ΰΉˆ", "ΰΈΰΉˆΰΈ­ΰΈ•ΰΈ±ΰΉ‰ΰΈ‡", "ΰΈ•ΰΈ±ΰΉ‰ΰΈ‡ΰΈ­ΰΈ’ΰΈΉΰΉˆΰΈ—ΰΈ΅ΰΉˆ", "ΰΉ€ΰΈ›ΰΉ‡ΰΈ™ΰΈͺΰΉˆΰΈ§ΰΈ™ΰΈ«ΰΈ™ΰΈΆΰΉˆΰΈ‡ΰΈ‚ΰΈ­ΰΈ‡", "ΰΉ€ΰΈΰΈ΅ΰΉˆΰΈ’ΰΈ§ΰΈ‚ΰΉ‰ΰΈ­ΰΈ‡ΰΈΰΈ±ΰΈš", "ΰΉ€ΰΈ›ΰΉ‡ΰΈ™ΰΉ€ΰΈˆΰΉ‰ΰΈ²ΰΈ‚ΰΈ­ΰΈ‡", "ΰΈˆΰΈ±ΰΈ”ΰΈΰΈ²ΰΈ£",
121
+ "ΰΈ™ΰΈ³ΰΉ‚ΰΈ”ΰΈ’", "ΰΈ£ΰΈ²ΰΈ’ΰΈ‡ΰΈ²ΰΈ™ΰΈ•ΰΉˆΰΈ­", "ΰΈ£ΰΉˆΰΈ§ΰΈ‘ΰΈ‡ΰΈ²ΰΈ™ΰΈΰΈ±ΰΈš", "ΰΉ€ΰΈ›ΰΉ‡ΰΈ™ΰΈžΰΈ±ΰΈ™ΰΈ˜ΰΈ‘ΰΈ΄ΰΈ•ΰΈ£ΰΈΰΈ±ΰΈš", "ΰΈˆΰΈ±ΰΈ”ΰΈ«ΰΈ²ΰΉƒΰΈ«ΰΉ‰", "ΰΈ‹ΰΈ·ΰΉ‰ΰΈ­ΰΈΰΈ΄ΰΈˆΰΈΰΈ²ΰΈ£", "ΰΈ₯ΰΈ‡ΰΈ—ΰΈΈΰΈ™ΰΉƒΰΈ™",
122
+ "ΰΈͺΰΈ³ΰΈ™ΰΈ±ΰΈΰΈ‡ΰΈ²ΰΈ™ΰΉƒΰΈ«ΰΈΰΉˆΰΈ—ΰΈ΅ΰΉˆ", "ดำเนินการใน", "ΰΉ€ΰΈΰΈ΄ΰΈ”ΰΈ—ΰΈ΅ΰΉˆ", "ΰΈ­ΰΈ²ΰΈ¨ΰΈ±ΰΈ’ΰΈ­ΰΈ’ΰΈΉΰΉˆΰΈ—ΰΈ΅ΰΉˆ", "ΰΈ¨ΰΈΆΰΈΰΈ©ΰΈ²ΰΈ—ΰΈ΅ΰΉˆ", "จบการศยกษาจาก",
123
+ # Law enforcement relationships
124
+ "arrested_by", "investigated_by", "confiscated_from", "used_in", "evidence_of", "witness_of",
125
+ "victim_of", "suspect_in", "charged_with", "convicted_of", "sentenced_by", "defended_by",
126
+ "prosecuted_by", "testified_against", "alibi_for", "found_at", "seized_from", "linked_to",
127
+ "ΰΈˆΰΈ±ΰΈšΰΈΰΈΈΰΈ‘ΰΉ‚ΰΈ”ΰΈ’", "ΰΈͺอบΰΈͺΰΈ§ΰΈ™ΰΉ‚ΰΈ”ΰΈ’", "ΰΈ’ΰΈΆΰΈ”ΰΈˆΰΈ²ΰΈ", "ΰΉƒΰΈŠΰΉ‰ΰΉƒΰΈ™ΰΈΰΈ²ΰΈ£", "ΰΈ«ΰΈ₯ักฐานของ", "ΰΈžΰΈ’ΰΈ²ΰΈ™ΰΉƒΰΈ™", "ΰΉ€ΰΈ«ΰΈ’ΰΈ·ΰΉˆΰΈ­ΰΈ‚ΰΈ­ΰΈ‡",
128
+ "ΰΈœΰΈΉΰΉ‰ΰΈ•ΰΉ‰ΰΈ­ΰΈ‡ΰΈͺΰΈ‡ΰΈͺΰΈ±ΰΈ’ΰΉƒΰΈ™", "ถูกตั้งข้อหา", "ถูกตัดΰΈͺΰΈ΄ΰΈ™", "ΰΈ–ΰΈΉΰΈΰΈžΰΈ΄ΰΈžΰΈ²ΰΈΰΈ©ΰΈ²ΰΉ‚ΰΈ”ΰΈ’", "ΰΈ•ΰΉˆΰΈ­ΰΈͺΰΈΉΰΉ‰ΰΈ„ΰΈ”ΰΈ΅ΰΉ‚ΰΈ”ΰΈ’", "ΰΈŸΰΉ‰ΰΈ­ΰΈ‡ΰΈ£ΰΉ‰ΰΈ­ΰΈ‡ΰΉ‚ΰΈ”ΰΈ’",
129
+ "ΰΉƒΰΈ«ΰΉ‰ΰΈΰΈ²ΰΈ£ΰΈ•ΰΉˆΰΈ­ΰΈ•ΰΉ‰ΰΈ²ΰΈ™", "เป็นข้อแก้ตัวΰΈͺำหรับ", "ΰΈžΰΈšΰΈ—ΰΈ΅ΰΉˆ", "ΰΈ’ΰΈΆΰΈ”ΰΈˆΰΈ²ΰΈ", "ΰΉ€ΰΈŠΰΈ·ΰΉˆΰΈ­ΰΈ‘ΰΉ‚ΰΈ’ΰΈ‡ΰΈΰΈ±ΰΈš",
130
+ # Criminal relationships
131
+ "possess_illegal", "transport_illegal", "sell_illegal", "buy_illegal", "hide_evidence",
132
+ "plan_crime", "commit_crime", "flee_from", "escape_from", "hide_at", "meet_with",
133
+ "communicate_with", "threaten", "blackmail", "bribe", "corrupt", "money_launder",
134
+ "ΰΈ„ΰΈ£ΰΈ­ΰΈšΰΈ„ΰΈ£ΰΈ­ΰΈ‡ΰΈ‚ΰΈ­ΰΈ‡ΰΈœΰΈ΄ΰΈ”ΰΈΰΈŽΰΈ«ΰΈ‘ΰΈ²ΰΈ’", "ΰΈ‚ΰΈ™ΰΈͺΰΉˆΰΈ‡ΰΈ‚ΰΈ­ΰΈ‡ΰΈœΰΈ΄ΰΈ”ΰΈΰΈŽΰΈ«ΰΈ‘ΰΈ²ΰΈ’", "ΰΈ‚ΰΈ²ΰΈ’ΰΈ‚ΰΈ­ΰΈ‡ΰΈœΰΈ΄ΰΈ”ΰΈΰΈŽΰΈ«ΰΈ‘ΰΈ²ΰΈ’", "ΰΈ‹ΰΈ·ΰΉ‰ΰΈ­ΰΈ‚ΰΈ­ΰΈ‡ΰΈœΰΈ΄ΰΈ”ΰΈΰΈŽΰΈ«ΰΈ‘ΰΈ²ΰΈ’",
135
+ "ΰΈ‹ΰΉˆΰΈ­ΰΈ™ΰΈ«ΰΈ₯ักฐาน", "ΰΈ§ΰΈ²ΰΈ‡ΰΉΰΈœΰΈ™ΰΈ­ΰΈ²ΰΈŠΰΈΰΈ²ΰΈΰΈ£ΰΈ£ΰΈ‘", "ΰΈΰΈ£ΰΈ°ΰΈ—ΰΈ³ΰΈ­ΰΈ²ΰΈŠΰΈΰΈ²ΰΈΰΈ£ΰΈ£ΰΈ‘", "ΰΈ«ΰΈ₯ΰΈšΰΈ«ΰΈ™ΰΈ΅ΰΈˆΰΈ²ΰΈ", "ΰΉΰΈ­ΰΈšΰΈ‹ΰΉˆΰΈ­ΰΈ™ΰΈ—ΰΈ΅ΰΉˆ",
136
+ "ΰΈžΰΈšΰΈ›ΰΈ°ΰΈΰΈ±ΰΈš", "ΰΈ•ΰΈ΄ΰΈ”ΰΈ•ΰΉˆΰΈ­ΰΈΰΈ±ΰΈš", "ΰΈ‚ΰΉˆΰΈ‘ΰΈ‚ΰΈΉΰΉˆ", "แบΰΈ₯ΰΉ‡ΰΈ„ΰΉ€ΰΈ‘ΰΈ₯์", "ΰΉƒΰΈ«ΰΉ‰ΰΈͺΰΈ΄ΰΈ™ΰΈšΰΈ™", "ΰΈ—ΰΈΈΰΈˆΰΈ£ΰΈ΄ΰΈ•", "ΰΈŸΰΈ­ΰΈΰΉ€ΰΈ‡ΰΈ΄ΰΈ™"
137
+ ]
138
+
139
+ def validate_deepseek(self) -> bool:
140
+ """Validate DeepSeek configuration"""
141
+ return bool(
142
+ self.DEEPSEEK_ENDPOINT and
143
+ self.DEEPSEEK_API_KEY and
144
+ self.DEEPSEEK_ENDPOINT != "YOUR_DEEPSEEK_ENDPOINT" and
145
+ self.DEEPSEEK_API_KEY != "YOUR_DEEPSEEK_API_KEY"
146
+ )
147
+
148
+ class OCRConfig(BaseConfig):
149
+ """Configuration for OCR Service"""
150
+
151
+ def __init__(self):
152
+ super().__init__()
153
+ self.PORT = int(os.getenv("OCR_PORT", "8400"))
154
+
155
+ # Azure Document Intelligence Configuration
156
+ self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT", "")
157
+ self.AZURE_DOCUMENT_INTELLIGENCE_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_KEY", "")
158
+
159
+ # Web scraping configuration
160
+ self.MAX_IMAGES_PER_PAGE = int(os.getenv("MAX_IMAGES_PER_PAGE", "10"))
161
+ self.USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
162
+
163
+ def validate_azure_document_intelligence(self) -> bool:
164
+ """Validate Azure Document Intelligence configuration"""
165
+ return bool(
166
+ self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT and
167
+ self.AZURE_DOCUMENT_INTELLIGENCE_KEY and
168
+ self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT != "YOUR_FORM_RECOGNIZER_ENDPOINT" and
169
+ self.AZURE_DOCUMENT_INTELLIGENCE_KEY != "YOUR_FORM_RECOGNIZER_KEY"
170
+ )
171
+
172
+ class RAGConfig(BaseConfig):
173
+ """Configuration for RAG Service"""
174
+
175
+ def __init__(self):
176
+ super().__init__()
177
+ self.PORT = int(os.getenv("RAG_PORT", "8401"))
178
+
179
+ # OCR Service Configuration
180
+ self.OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:8400")
181
+
182
+ # PostgreSQL Configuration (specific to RAG)
183
+ self.PG_HOST = self.POSTGRES_HOST
184
+ self.PG_PORT = self.POSTGRES_PORT
185
+ self.PG_DATABASE = os.getenv("PG_DATABASE", "vectorsearch") # RAG uses different default DB
186
+ self.PG_USER = self.POSTGRES_USER
187
+ self.PG_PASSWORD = self.POSTGRES_PASSWORD
188
+ self.PG_SSL_MODE = os.getenv("PG_SSL_MODE", "require")
189
+
190
+ # Chunking Configuration
191
+ self.CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "1000"))
192
+ self.CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "200"))
193
+ self.MIN_CHUNK_SIZE = int(os.getenv("MIN_CHUNK_SIZE", "50"))
194
+
195
+ # Azure OpenAI Configuration (RAG specific)
196
+ self.AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT", "text-embedding-3-small")
197
+ self.AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
198
+
199
+ class UnifiedConfig:
200
+ """Unified configuration for all services"""
201
+
202
+ def __init__(self):
203
+ self.ner = NERConfig()
204
+ self.ocr = OCRConfig()
205
+ self.rag = RAGConfig()
206
+
207
+ # Main app configuration
208
+ self.MAIN_PORT = int(os.getenv("MAIN_PORT", "8000"))
209
+ self.MAIN_HOST = os.getenv("MAIN_HOST", "0.0.0.0")
210
+
211
+ # Service URLs (for inter-service communication)
212
+ self.NER_SERVICE_URL = f"http://localhost:{self.ner.PORT}"
213
+ self.OCR_SERVICE_URL = f"http://localhost:{self.ocr.PORT}"
214
+ self.RAG_SERVICE_URL = f"http://localhost:{self.rag.PORT}"
215
+
216
+ # Service Health Check Configuration
217
+ self.HEALTH_CHECK_TIMEOUT = 30
218
+ self.HEALTH_CHECK_RETRIES = 3
219
+ self.HEALTH_CHECK_INTERVAL = 5
220
+
221
+ # Load balancing and routing
222
+ self.SERVICE_WEIGHTS = {
223
+ "ner": 1.0,
224
+ "ocr": 1.0,
225
+ "rag": 1.0
226
+ }
227
+
228
+ def validate_all(self) -> Dict[str, Dict[str, bool]]:
229
+ """Validate all service configurations"""
230
+ validation_results = {
231
+ "ner": {
232
+ "deepseek": self.ner.validate_deepseek(),
233
+ "azure_openai": self.ner.validate_azure_openai(),
234
+ "postgres": self.ner.validate_postgres(),
235
+ "azure_storage": self.ner.validate_azure_storage()
236
+ },
237
+ "ocr": {
238
+ "azure_document_intelligence": self.ocr.validate_azure_document_intelligence()
239
+ },
240
+ "rag": {
241
+ "azure_openai": self.rag.validate_azure_openai(),
242
+ "postgres": self.rag.validate_postgres()
243
+ }
244
+ }
245
+ return validation_results
246
+
247
+ def get_service_config(self, service_name: str) -> BaseConfig:
248
+ """Get configuration for a specific service"""
249
+ service_configs = {
250
+ "ner": self.ner,
251
+ "ocr": self.ocr,
252
+ "rag": self.rag
253
+ }
254
+ return service_configs.get(service_name.lower())
255
+
256
+ def get_database_config(self) -> Dict[str, str]:
257
+ """Get database configuration for services that need it"""
258
+ return {
259
+ "host": self.ner.POSTGRES_HOST,
260
+ "port": str(self.ner.POSTGRES_PORT),
261
+ "user": self.ner.POSTGRES_USER,
262
+ "password": self.ner.POSTGRES_PASSWORD,
263
+ "database": self.ner.POSTGRES_DATABASE,
264
+ "ssl_mode": getattr(self.rag, 'PG_SSL_MODE', 'require')
265
+ }
266
+
267
+ def get_azure_openai_config(self) -> Dict[str, str]:
268
+ """Get Azure OpenAI configuration for services that need it"""
269
+ return {
270
+ "endpoint": self.ner.AZURE_OPENAI_ENDPOINT,
271
+ "api_key": self.ner.AZURE_OPENAI_API_KEY,
272
+ "embedding_model": self.ner.EMBEDDING_MODEL,
273
+ "deployment_name": self.ner.AZURE_OPENAI_DEPLOYMENT_NAME
274
+ }
275
+
276
+ def print_configuration_summary(self):
277
+ """Print a summary of all configurations"""
278
+ print("πŸ”§ Configuration Summary")
279
+ print("=" * 50)
280
+
281
+ # Validate all configurations
282
+ validation_results = self.validate_all()
283
+
284
+ # NER Service
285
+ print(f"πŸ“ NER Service (Port {self.ner.PORT}):")
286
+ print(f" DeepSeek: {'βœ…' if validation_results['ner']['deepseek'] else '❌'}")
287
+ print(f" Azure OpenAI: {'βœ…' if validation_results['ner']['azure_openai'] else '❌'}")
288
+ print(f" PostgreSQL: {'βœ…' if validation_results['ner']['postgres'] else '❌'}")
289
+ print(f" Azure Storage: {'βœ…' if validation_results['ner']['azure_storage'] else '❌'}")
290
+ print(f" OCR Service URL: {self.ner.OCR_SERVICE_URL}")
291
+
292
+ # OCR Service
293
+ print(f"\nπŸ” OCR Service (Port {self.ocr.PORT}):")
294
+ print(f" Azure Document Intelligence: {'βœ…' if validation_results['ocr']['azure_document_intelligence'] else '❌'}")
295
+ print(f" Max File Size: {self.ocr.MAX_FILE_SIZE / (1024*1024):.0f} MB")
296
+
297
+ # RAG Service
298
+ print(f"\n🧠 RAG Service (Port {self.rag.PORT}):")
299
+ print(f" Azure OpenAI: {'βœ…' if validation_results['rag']['azure_openai'] else '❌'}")
300
+ print(f" PostgreSQL: {'βœ…' if validation_results['rag']['postgres'] else '❌'}")
301
+ print(f" OCR Service URL: {self.rag.OCR_SERVICE_URL}")
302
+ print(f" Chunk Size: {self.rag.CHUNK_SIZE}")
303
+
304
+ # Main App
305
+ print(f"\n🌐 Main App (Port {self.MAIN_PORT}):")
306
+ print(f" NER Service: {self.NER_SERVICE_URL}")
307
+ print(f" OCR Service: {self.OCR_SERVICE_URL}")
308
+ print(f" RAG Service: {self.RAG_SERVICE_URL}")
309
+
310
+ # Database Configuration
311
+ print(f"\nπŸ—„οΈ Database Configuration:")
312
+ print(f" Host: {self.ner.POSTGRES_HOST}")
313
+ print(f" Port: {self.ner.POSTGRES_PORT}")
314
+ print(f" User: {self.ner.POSTGRES_USER}")
315
+ print(f" NER Database: {self.ner.POSTGRES_DATABASE}")
316
+ print(f" RAG Database: {self.rag.PG_DATABASE}")
317
+
318
+ # Critical Issues
319
+ all_validations = []
320
+ for service, validations in validation_results.items():
321
+ all_validations.extend(validations.values())
322
+
323
+ if not all(all_validations):
324
+ print(f"\n⚠️ CONFIGURATION ISSUES DETECTED:")
325
+ for service, validations in validation_results.items():
326
+ for component, is_valid in validations.items():
327
+ if not is_valid:
328
+ print(f" ❌ {service.upper()}: {component} not configured")
329
+ else:
330
+ print(f"\nβœ… All configurations are valid!")
331
+
332
+ # Global configuration instance
333
+ config = UnifiedConfig()
334
+
335
+ def get_config() -> UnifiedConfig:
336
+ """Get the global configuration instance"""
337
+ return config
338
+
339
+ def validate_environment() -> bool:
340
+ """Validate the entire environment configuration"""
341
+ validation_results = config.validate_all()
342
+
343
+ # Check critical components
344
+ critical_components = [
345
+ validation_results['ner']['azure_openai'],
346
+ validation_results['ner']['postgres'],
347
+ validation_results['ocr']['azure_document_intelligence'],
348
+ validation_results['rag']['azure_openai'],
349
+ validation_results['rag']['postgres']
350
+ ]
351
+
352
+ return all(critical_components)
353
+
354
+ if __name__ == "__main__":
355
+ """Test configuration loading and validation"""
356
+ print("πŸ§ͺ Testing Configuration Loading")
357
+ print("=" * 40)
358
+
359
+ try:
360
+ config.print_configuration_summary()
361
+
362
+ if validate_environment():
363
+ print("\nπŸŽ‰ Environment validation passed!")
364
+ print("All critical services are properly configured.")
365
+ else:
366
+ print("\n❌ Environment validation failed!")
367
+ print("Some critical services are not properly configured.")
368
+ print("Please check your .env file and update missing values.")
369
+
370
+ except Exception as e:
371
+ print(f"\n❌ Configuration loading failed: {e}")
372
+ logger.error(f"Configuration error: {e}")
demo.py ADDED
@@ -0,0 +1,527 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unified AI Services - Interactive Demo
4
+ Demonstrates the capabilities of the unified system with real examples
5
+ """
6
+
7
+ import asyncio
8
+ import httpx
9
+ import json
10
+ import time
11
+ import sys
12
+ from typing import Dict, Any, Optional
13
+
14
+ # Demo configuration
15
+ UNIFIED_URL = "http://localhost:8000"
16
+ TIMEOUT = 60
17
+
18
+ # Demo data
19
+ DEMO_TEXTS = {
20
+ "thai_crime": """
21
+ ΰΈ„ΰΈ”ΰΈ΅ΰΈ­ΰΈ²ΰΈΰΈ²ΰΈ—ΰΈ΅ΰΉˆΰΈͺำคัญ: ΰΈΰΈ²ΰΈ£ΰΈ†ΰΈ²ΰΈ•ΰΈΰΈ£ΰΈ£ΰΈ‘ΰΈ—ΰΈ΅ΰΉˆΰΈΰΈ£ΰΈΈΰΈ‡ΰΉ€ΰΈ—ΰΈžΰΈ‘ΰΈ«ΰΈ²ΰΈ™ΰΈ„ΰΈ£
22
+
23
+ ΰΉ€ΰΈ‘ΰΈ·ΰΉˆΰΈ­ΰΈ§ΰΈ±ΰΈ™ΰΈ—ΰΈ΅ΰΉˆ 15 ΰΈ•ΰΈΈΰΈ₯ΰΈ²ΰΈ„ΰΈ‘ 2567 ΰΉ€ΰΈ§ΰΈ₯ΰΈ² 14:30 ΰΈ™.
24
+ ΰΈ™ΰΈ²ΰΈ’ΰΈͺฑชาฒ ΰΉƒΰΈˆΰΈ”ΰΈ΅ ΰΈ­ΰΈ²ΰΈ’ΰΈΈ 45 ΰΈ›ΰΈ΅ ΰΈ­ΰΈ²ΰΈŠΰΈ΅ΰΈžΰΈ™ΰΈ±ΰΈΰΈ˜ΰΈΈΰΈ£ΰΈΰΈ΄ΰΈˆ
25
+ ΰΈ–ΰΈΉΰΈΰΈžΰΈšΰΉ€ΰΈͺΰΈ΅ΰΈ’ΰΈŠΰΈ΅ΰΈ§ΰΈ΄ΰΈ•ΰΈ—ΰΈ΅ΰΉˆΰΈ„ΰΈ­ΰΈ™ΰΉ‚ΰΈ”ΰΈ‘ΰΈ΄ΰΉ€ΰΈ™ΰΈ΅ΰΈ’ΰΈ‘ ΰΉ€ΰΈ”ΰΈ­ΰΈ° ΰΈ£ΰΈ΄ΰΉ€ΰΈ§ΰΈ­ΰΈ£ΰΉŒ ΰΈ‹ΰΈ΄ΰΈ•ΰΈ΅ΰΉ‰ ΰΈŠΰΈ±ΰΉ‰ΰΈ™ 25
26
+
27
+ ΰΈœΰΈΉΰΉ‰ΰΈ•ΰΉ‰ΰΈ­ΰΈ‡ΰΈͺΰΈ‡ΰΈͺΰΈ±ΰΈ’: ΰΈ™ΰΈ²ΰΈ‡ΰΈͺΰΈ²ΰΈ§ΰΈ‘ΰΈ“ΰΈ΅ รักเงิน ΰΈ­ΰΈ²ΰΈ’ΰΈΈ 32 ΰΈ›ΰΈ΅
28
+ ΰΉ€ΰΈ›ΰΉ‡ΰΈ™ΰΉ€ΰΈ₯ΰΈ‚ΰΈ²ΰΈ™ΰΈΈΰΈΰΈ²ΰΈ£ΰΈ‚ΰΈ­ΰΈ‡ΰΈœΰΈΉΰΉ‰ΰΉ€ΰΈͺΰΈ΅ΰΈ’ΰΈŠΰΈ΅ΰΈ§ΰΈ΄ΰΈ•
29
+
30
+ ΰΈ«ΰΈ₯ักฐาน: พบΰΈͺΰΈ²ΰΈ£ΰΈžΰΈ΄ΰΈ©ΰΉƒΰΈ™ΰΉΰΈΰΉ‰ΰΈ§ΰΈ™ΰΉ‰ΰΈ³
31
+ ΰΉ€ΰΈ‡ΰΈ΄ΰΈ™ΰΈˆΰΈ³ΰΈ™ΰΈ§ΰΈ™ 500,000 ΰΈšΰΈ²ΰΈ— ΰΈ«ΰΈ²ΰΈ’ΰΉ„ΰΈ›ΰΈˆΰΈ²ΰΈΰΈ•ΰΈΉΰΉ‰ΰΉ€ΰΈ‹ΰΈŸ
32
+ กΰΈ₯ΰΉ‰ΰΈ­ΰΈ‡ΰΈ§ΰΈ‡ΰΈˆΰΈ£ΰΈ›ΰΈ΄ΰΈ”ΰΈšΰΈ±ΰΈ™ΰΈ—ΰΈΆΰΈΰΉ€ΰΈ«ΰΈ•ΰΈΈΰΈΰΈ²ΰΈ£ΰΈ“ΰΉŒΰΉ„ΰΈ”ΰΉ‰
33
+
34
+ ΰΈ•ΰΈ³ΰΈ£ΰΈ§ΰΈˆΰΈͺΰΈ–ΰΈ²ΰΈ™ΰΈ΅ΰΈ—ΰΈ­ΰΈ‡ΰΈ«ΰΈ₯ΰΉˆΰΈ­ΰΈ—ΰΈ³ΰΈΰΈ²ΰΈ£ΰΈͺืบΰΈͺΰΈ§ΰΈ™
35
+ ΰΈžΰΈšΰΈ§ΰΉˆΰΈ²ΰΈœΰΈΉΰΉ‰ΰΈ•ΰΉ‰ΰΈ­ΰΈ‡ΰΈͺΰΈ‡ΰΈͺΰΈ±ΰΈ’ΰΈ‘ΰΈ΅ΰΈ«ΰΈ™ΰΈ΅ΰΉ‰ΰΈͺΰΈ΄ΰΈ™ΰΈˆΰΈ³ΰΈ™ΰΈ§ΰΈ™ΰΈ‘ΰΈ²ΰΈ
36
+ """,
37
+
38
+ "english_business": """
39
+ Corporate Investigation Report - Tech Acquisition
40
+
41
+ On October 20, 2024, Microsoft Corporation announced the acquisition
42
+ of AI startup InnovateTech for $2.5 billion USD.
43
+
44
+ Key Personnel:
45
+ - CEO Sarah Johnson of InnovateTech
46
+ - VP Acquisitions David Chen at Microsoft
47
+ - Investment banker Lisa Rodriguez from Goldman Sachs
48
+
49
+ The deal includes:
50
+ - 150 AI researchers and engineers
51
+ - Proprietary machine learning algorithms
52
+ - Patents portfolio worth $800 million
53
+ - Office locations in San Francisco and Seattle
54
+
55
+ The acquisition strengthens Microsoft's position in the AI market
56
+ and provides access to advanced natural language processing technology.
57
+ """,
58
+
59
+ "mixed_content": """
60
+ International Business Partnership
61
+ ΰΈšΰΈ£ΰΈ΄ΰΈ©ΰΈ±ΰΈ— ΰΉ„ΰΈ—ΰΈ’ΰΉ€ΰΈ—ΰΈ„ ΰΈˆΰΈ³ΰΈΰΈ±ΰΈ” (ThaiTech Ltd.)
62
+
63
+ Partnership Agreement between:
64
+ - ThaiTech Limited (Thailand)
65
+ - Singapore AI Solutions Pte Ltd (Singapore)
66
+ - Tokyo Innovation Corp (Japan)
67
+
68
+ ข้อตกΰΈ₯ΰΈ‡ΰΈ„ΰΈ§ΰΈ²ΰΈ‘ΰΈ£ΰΉˆΰΈ§ΰΈ‘ΰΈ‘ΰΈ·ΰΈ­:
69
+ Investment: $10 million USD (approximately 350 million Thai Baht)
70
+ Duration: 5 years (2024-2029)
71
+ Focus: Artificial Intelligence and Machine Learning
72
+
73
+ Key Locations:
74
+ - Bangkok, Thailand (Head Office)
75
+ - ΰΈͺΰΈ΄ΰΈ‡ΰΈ„ΰΉ‚ΰΈ›ΰΈ£ΰΉŒ (Singapore Regional Office)
76
+ - Tokyo, Japan (R&D Center)
77
+
78
+ Expected Revenue: $50 million USD by 2027
79
+ """
80
+ }
81
+
82
+ class UnifiedDemo:
83
+ """Interactive demo for the unified AI services"""
84
+
85
+ def __init__(self):
86
+ self.session = None
87
+ self.demo_results = {}
88
+
89
+ async def __aenter__(self):
90
+ self.session = httpx.AsyncClient(timeout=TIMEOUT)
91
+ return self
92
+
93
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
94
+ if self.session:
95
+ await self.session.aclose()
96
+
97
+ def print_header(self, title: str):
98
+ """Print formatted header"""
99
+ print("\n" + "=" * 70)
100
+ print(f" {title}")
101
+ print("=" * 70)
102
+
103
+ def print_section(self, title: str):
104
+ """Print section header"""
105
+ print(f"\nπŸ“‹ {title}")
106
+ print("-" * 50)
107
+
108
+ async def check_system_health(self) -> bool:
109
+ """Check if the unified system is healthy"""
110
+ try:
111
+ response = await self.session.get(f"{UNIFIED_URL}/health")
112
+
113
+ if response.status_code == 200:
114
+ data = response.json()
115
+ status = data.get("status")
116
+ services = data.get("services", [])
117
+
118
+ print(f"πŸ₯ System Health: {status}")
119
+
120
+ for service in services:
121
+ health_icon = "βœ…" if service.get("health") else "❌"
122
+ print(f" {health_icon} {service.get('name', 'unknown')}: {service.get('status', 'unknown')}")
123
+
124
+ healthy_services = [s for s in services if s.get("health")]
125
+
126
+ if len(healthy_services) >= 3: # At least 3 services should be healthy
127
+ print("βœ… System is ready for demo!")
128
+ return True
129
+ else:
130
+ print("❌ System is not ready. Please ensure all services are running.")
131
+ return False
132
+ else:
133
+ print(f"❌ Health check failed: HTTP {response.status_code}")
134
+ return False
135
+
136
+ except Exception as e:
137
+ print(f"❌ Cannot connect to unified system: {e}")
138
+ print("\nπŸ’‘ Make sure the unified application is running:")
139
+ print(" python app.py")
140
+ return False
141
+
142
+ async def demo_unified_analysis(self, text: str, title: str) -> Optional[Dict[str, Any]]:
143
+ """Demonstrate unified analysis capabilities"""
144
+ self.print_section(f"Unified Analysis: {title}")
145
+
146
+ try:
147
+ print(f"πŸ“ Analyzing text ({len(text)} characters)...")
148
+ print(f" Text preview: {text[:100]}...")
149
+
150
+ request_data = {
151
+ "text": text,
152
+ "extract_relationships": True,
153
+ "include_embeddings": False,
154
+ "include_summary": True,
155
+ "generate_graph_files": True,
156
+ "export_formats": ["neo4j", "json"],
157
+ "enable_rag_indexing": True,
158
+ "rag_title": f"Demo: {title}",
159
+ "rag_keywords": ["demo", "analysis", "test"],
160
+ "rag_metadata": {"demo": True, "category": title.lower()}
161
+ }
162
+
163
+ start_time = time.time()
164
+ response = await self.session.post(f"{UNIFIED_URL}/analyze/unified", json=request_data)
165
+ processing_time = time.time() - start_time
166
+
167
+ if response.status_code == 200:
168
+ data = response.json()
169
+
170
+ if data.get("success"):
171
+ service_calls = data.get("service_calls", [])
172
+ ner_analysis = data.get("ner_analysis", {})
173
+ rag_document = data.get("rag_document", {})
174
+
175
+ print(f"βœ… Analysis completed in {processing_time:.2f} seconds")
176
+ print(f"πŸ“ž Service calls: {', '.join(service_calls)}")
177
+
178
+ # NER Results
179
+ if ner_analysis:
180
+ entities = ner_analysis.get("entities", [])
181
+ relationships = ner_analysis.get("relationships", [])
182
+ language = ner_analysis.get("language", "unknown")
183
+
184
+ print(f"\n🏷️ NER Analysis Results:")
185
+ print(f" Language detected: {language}")
186
+ print(f" Entities found: {len(entities)}")
187
+ print(f" Relationships found: {len(relationships)}")
188
+
189
+ # Show top entities by type
190
+ entity_types = {}
191
+ for entity in entities:
192
+ entity_type = entity.get("label", "UNKNOWN")
193
+ if entity_type not in entity_types:
194
+ entity_types[entity_type] = []
195
+ entity_types[entity_type].append(entity.get("text", ""))
196
+
197
+ print(f"\n πŸ“Š Entity breakdown:")
198
+ for entity_type, entity_list in sorted(entity_types.items()):
199
+ print(f" {entity_type}: {len(entity_list)} entities")
200
+ # Show a few examples
201
+ examples = entity_list[:3]
202
+ if examples:
203
+ print(f" Examples: {', '.join(examples)}")
204
+
205
+ # Show relationships
206
+ if relationships:
207
+ print(f"\n πŸ”— Relationship examples:")
208
+ for rel in relationships[:3]:
209
+ source = rel.get("source_entity", "Unknown")
210
+ target = rel.get("target_entity", "Unknown")
211
+ rel_type = rel.get("relationship_type", "unknown")
212
+ confidence = rel.get("confidence", 0)
213
+ print(f" {source} β†’ {target} ({rel_type}, {confidence:.2f})")
214
+ else:
215
+ print(f" ⚠️ No relationships found")
216
+
217
+ # RAG Results
218
+ if rag_document:
219
+ print(f"\nπŸ’Ύ RAG Indexing Results:")
220
+ print(f" Document ID: {rag_document.get('document_id', 'N/A')}")
221
+ print(f" Total chunks: {rag_document.get('total_chunks', 0)}")
222
+ print(f" Status: Document indexed for search")
223
+ else:
224
+ print(f"\n⚠️ RAG indexing was not performed")
225
+
226
+ # Store results for later use
227
+ self.demo_results[title] = data
228
+ return data
229
+ else:
230
+ print(f"❌ Analysis failed: {data.get('error', 'Unknown error')}")
231
+ return None
232
+ else:
233
+ print(f"❌ Request failed: HTTP {response.status_code}")
234
+ print(f" Response: {response.text[:200]}")
235
+ return None
236
+
237
+ except Exception as e:
238
+ print(f"❌ Analysis error: {e}")
239
+ return None
240
+
241
+ async def demo_combined_search(self):
242
+ """Demonstrate combined search capabilities"""
243
+ self.print_section("Combined Search with NER Enhancement")
244
+
245
+ search_queries = [
246
+ "murder investigation Thailand",
247
+ "Microsoft acquisition business",
248
+ "artificial intelligence partnership"
249
+ ]
250
+
251
+ for query in search_queries:
252
+ try:
253
+ print(f"\nπŸ” Searching for: '{query}'")
254
+
255
+ request_data = {
256
+ "query": query,
257
+ "limit": 3,
258
+ "similarity_threshold": 0.1,
259
+ "include_ner_analysis": True,
260
+ "ner_export_formats": ["json"]
261
+ }
262
+
263
+ start_time = time.time()
264
+ response = await self.session.post(f"{UNIFIED_URL}/search/combined", json=request_data)
265
+ search_time = time.time() - start_time
266
+
267
+ if response.status_code == 200:
268
+ data = response.json()
269
+
270
+ if data.get("success"):
271
+ search_results = data.get("search_results", {})
272
+ results = search_results.get("results", [])
273
+ ner_analyses = search_results.get("ner_analyses", [])
274
+
275
+ print(f" βœ… Search completed in {search_time:.2f} seconds")
276
+ print(f" πŸ“Š Found {len(results)} results")
277
+
278
+ for i, result in enumerate(results):
279
+ chunk = result.get("chunk", {})
280
+ similarity = result.get("similarity_score", 0)
281
+ doc_info = result.get("document_info", {})
282
+
283
+ print(f"\n πŸ“„ Result {i+1} (similarity: {similarity:.3f}):")
284
+ print(f" Title: {doc_info.get('title', 'Untitled')}")
285
+ print(f" Content: {chunk.get('content', '')[:100]}...")
286
+
287
+ if ner_analyses:
288
+ print(f"\n 🏷️ NER analysis performed on top {len(ner_analyses)} results")
289
+ for ner_data in ner_analyses:
290
+ ner_result = ner_data.get("ner_analysis", {})
291
+ if ner_result.get("success"):
292
+ entities = ner_result.get("entities", [])
293
+ relationships = ner_result.get("relationships", [])
294
+ print(f" Result {ner_data.get('result_index', 0)}: {len(entities)} entities, {len(relationships)} relationships")
295
+
296
+ else:
297
+ print(f" ❌ Search failed: {data.get('error', 'Unknown error')}")
298
+ else:
299
+ print(f" ❌ Search failed: HTTP {response.status_code}")
300
+
301
+ except Exception as e:
302
+ print(f" ❌ Search error: {e}")
303
+
304
+ async def demo_service_proxies(self):
305
+ """Demonstrate service proxy functionality"""
306
+ self.print_section("Service Proxy Demonstration")
307
+
308
+ # Test NER proxy
309
+ try:
310
+ print("πŸ§ͺ Testing NER service proxy...")
311
+
312
+ test_data = {
313
+ "text": "Quick test: Apple Inc. CEO Tim Cook visited Tokyo, Japan.",
314
+ "extract_relationships": True,
315
+ "include_embeddings": False,
316
+ "generate_graph_files": False
317
+ }
318
+
319
+ response = await self.session.post(f"{UNIFIED_URL}/ner/analyze/text", json=test_data)
320
+
321
+ if response.status_code == 200:
322
+ result = response.json()
323
+ if result.get("success"):
324
+ entities = result.get("entities", [])
325
+ print(f" βœ… NER proxy working: found {len(entities)} entities")
326
+ else:
327
+ print(f" ❌ NER proxy failed: {result.get('error', 'Unknown error')}")
328
+ else:
329
+ print(f" ❌ NER proxy failed: HTTP {response.status_code}")
330
+
331
+ except Exception as e:
332
+ print(f" ❌ NER proxy error: {e}")
333
+
334
+ # Test RAG proxy
335
+ try:
336
+ print("πŸ§ͺ Testing RAG service proxy...")
337
+
338
+ response = await self.session.get(f"{UNIFIED_URL}/rag/documents?limit=3")
339
+
340
+ if response.status_code == 200:
341
+ result = response.json()
342
+ documents = result.get("documents", [])
343
+ print(f" βœ… RAG proxy working: found {len(documents)} documents")
344
+ else:
345
+ print(f" ❌ RAG proxy failed: HTTP {response.status_code}")
346
+
347
+ except Exception as e:
348
+ print(f" ❌ RAG proxy error: {e}")
349
+
350
+ # Test OCR proxy
351
+ try:
352
+ print("πŸ§ͺ Testing OCR service proxy...")
353
+
354
+ response = await self.session.get(f"{UNIFIED_URL}/ocr/health")
355
+
356
+ if response.status_code == 200:
357
+ print(f" βœ… OCR proxy working: health check passed")
358
+ else:
359
+ print(f" ❌ OCR proxy failed: HTTP {response.status_code}")
360
+
361
+ except Exception as e:
362
+ print(f" ❌ OCR proxy error: {e}")
363
+
364
+ async def demo_service_discovery(self):
365
+ """Demonstrate service discovery"""
366
+ self.print_section("Service Discovery")
367
+
368
+ try:
369
+ response = await self.session.get(f"{UNIFIED_URL}/services")
370
+
371
+ if response.status_code == 200:
372
+ data = response.json()
373
+ services = data.get("services", {})
374
+ unified = data.get("unified", {})
375
+
376
+ print(f"πŸ” Service discovery successful:")
377
+ print(f" Unified endpoint: {unified.get('url', 'N/A')}")
378
+
379
+ for service_name, service_info in services.items():
380
+ endpoints = service_info.get("endpoints", [])
381
+ description = service_info.get("description", "No description")
382
+ url = service_info.get("url", "N/A")
383
+
384
+ print(f"\n πŸ“‘ {service_name.upper()} Service:")
385
+ print(f" URL: {url}")
386
+ print(f" Description: {description}")
387
+ print(f" Endpoints: {len(endpoints)} available")
388
+
389
+ # Show a few example endpoints
390
+ for endpoint in endpoints[:3]:
391
+ print(f" β€’ {endpoint}")
392
+ if len(endpoints) > 3:
393
+ print(f" β€’ ... and {len(endpoints) - 3} more")
394
+ else:
395
+ print(f"❌ Service discovery failed: HTTP {response.status_code}")
396
+
397
+ except Exception as e:
398
+ print(f"❌ Service discovery error: {e}")
399
+
400
+ def print_demo_summary(self):
401
+ """Print summary of demo results"""
402
+ self.print_section("Demo Summary")
403
+
404
+ if not self.demo_results:
405
+ print("No analysis results to summarize.")
406
+ return
407
+
408
+ total_entities = 0
409
+ total_relationships = 0
410
+ languages_detected = set()
411
+
412
+ for title, data in self.demo_results.items():
413
+ ner_analysis = data.get("ner_analysis", {})
414
+ if ner_analysis:
415
+ entities = ner_analysis.get("entities", [])
416
+ relationships = ner_analysis.get("relationships", [])
417
+ language = ner_analysis.get("language", "unknown")
418
+
419
+ total_entities += len(entities)
420
+ total_relationships += len(relationships)
421
+ languages_detected.add(language)
422
+
423
+ print(f"πŸ“Š {title}:")
424
+ print(f" Language: {language}")
425
+ print(f" Entities: {len(entities)}")
426
+ print(f" Relationships: {len(relationships)}")
427
+
428
+ print(f"\n🎯 Overall Demo Statistics:")
429
+ print(f" Total analyses: {len(self.demo_results)}")
430
+ print(f" Total entities extracted: {total_entities}")
431
+ print(f" Total relationships found: {total_relationships}")
432
+ print(f" Languages detected: {', '.join(languages_detected)}")
433
+
434
+ print(f"\n✨ Capabilities Demonstrated:")
435
+ print(f" βœ… Multi-language NER analysis (Thai + English)")
436
+ print(f" βœ… Relationship extraction and mapping")
437
+ print(f" βœ… RAG document indexing")
438
+ print(f" βœ… Combined search with NER enhancement")
439
+ print(f" βœ… Service proxy functionality")
440
+ print(f" βœ… Unified workflow coordination")
441
+ print(f" βœ… Real-time processing and analysis")
442
+
443
+ async def run_interactive_demo(self):
444
+ """Run the complete interactive demo"""
445
+ self.print_header("Unified AI Services - Interactive Demo")
446
+
447
+ print("This demo will showcase the capabilities of the unified AI system:")
448
+ print("β€’ Multi-language NER analysis with relationship extraction")
449
+ print("β€’ RAG document indexing and vector search")
450
+ print("β€’ Combined workflows and service coordination")
451
+ print("β€’ Service proxy functionality")
452
+ print("β€’ Real-time health monitoring")
453
+
454
+ # Check system health
455
+ print("\nπŸ” Checking system health...")
456
+ if not await self.check_system_health():
457
+ print("\n❌ Demo cannot proceed - system is not healthy")
458
+ return False
459
+
460
+ # Demo 1: Unified Analysis
461
+ self.print_header("Demo 1: Unified Analysis Capabilities")
462
+
463
+ for title, text in DEMO_TEXTS.items():
464
+ await self.demo_unified_analysis(text, title.replace("_", " ").title())
465
+ # Small delay between analyses
466
+ await asyncio.sleep(1)
467
+
468
+ # Demo 2: Combined Search
469
+ self.print_header("Demo 2: Combined Search with NER Enhancement")
470
+ await self.demo_combined_search()
471
+
472
+ # Demo 3: Service Proxies
473
+ self.print_header("Demo 3: Service Proxy Functionality")
474
+ await self.demo_service_proxies()
475
+
476
+ # Demo 4: Service Discovery
477
+ self.print_header("Demo 4: Service Discovery")
478
+ await self.demo_service_discovery()
479
+
480
+ # Summary
481
+ self.print_header("Demo Complete")
482
+ self.print_demo_summary()
483
+
484
+ print(f"\nπŸŽ‰ Demo completed successfully!")
485
+ print(f"πŸ“– For more information, visit: http://localhost:8000/docs")
486
+
487
+ return True
488
+
489
+ async def main():
490
+ """Main demo function"""
491
+ print("🎬 Unified AI Services - Interactive Demo")
492
+ print("=" * 50)
493
+
494
+ if len(sys.argv) > 1:
495
+ unified_url = sys.argv[1]
496
+ global UNIFIED_URL
497
+ UNIFIED_URL = unified_url
498
+
499
+ print(f"🎯 Demo target: {UNIFIED_URL}")
500
+ print("\nMake sure the unified application is running:")
501
+ print(" python app.py")
502
+
503
+ # Wait for user confirmation
504
+ try:
505
+ input("\nPress Enter to start the demo (or Ctrl+C to cancel)...")
506
+ except KeyboardInterrupt:
507
+ print("\nDemo cancelled.")
508
+ return
509
+
510
+ async with UnifiedDemo() as demo:
511
+ success = await demo.run_interactive_demo()
512
+
513
+ if success:
514
+ print(f"\nπŸ† Demo completed successfully!")
515
+ print(f"The unified AI services are working perfectly.")
516
+ else:
517
+ print(f"\n⚠️ Demo encountered some issues.")
518
+ print(f"Please check the system health and try again.")
519
+
520
+ if __name__ == "__main__":
521
+ try:
522
+ asyncio.run(main())
523
+ except KeyboardInterrupt:
524
+ print("\n\nπŸ›‘ Demo interrupted by user")
525
+ except Exception as e:
526
+ print(f"\n❌ Demo failed: {e}")
527
+ sys.exit(1)
gettingstart.md ADDED
@@ -0,0 +1,485 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Getting Started with Unified AI Services
2
+
3
+ This guide will walk you through setting up and running the complete Unified AI Services system.
4
+
5
+ ## πŸ“‹ Quick Overview
6
+
7
+ The Unified AI Services system consists of:
8
+ - **NER Service** (Port 8500): Named Entity Recognition with relationship extraction
9
+ - **OCR Service** (Port 8400): Optical Character Recognition with document processing
10
+ - **RAG Service** (Port 8401): Retrieval-Augmented Generation with vector search
11
+ - **Unified App** (Port 8000): Main application coordinating all services
12
+
13
+ ## πŸš€ Quick Start (Recommended)
14
+
15
+ ### Step 1: Automated Setup
16
+
17
+ ```bash
18
+ # Run the automated setup wizard
19
+ python setup.py
20
+ ```
21
+
22
+ This will:
23
+ - βœ… Check your Python environment
24
+ - βœ… Create necessary directories
25
+ - βœ… Help configure your .env file
26
+ - βœ… Install dependencies
27
+ - βœ… Validate configuration
28
+ - βœ… Create startup scripts
29
+
30
+ ### Step 2: Start the System
31
+
32
+ ```bash
33
+ # Start all services automatically
34
+ python app.py
35
+ ```
36
+
37
+ Or use the generated scripts:
38
+ - **Windows**: Double-click `start_services.bat`
39
+ - **Linux/Mac**: Run `./start_services.sh`
40
+
41
+ ### Step 3: Test the System
42
+
43
+ ```bash
44
+ # Run comprehensive tests
45
+ python test_unified.py
46
+ ```
47
+
48
+ Or use the generated scripts:
49
+ - **Windows**: Double-click `run_tests.bat`
50
+ - **Linux/Mac**: Run `./run_tests.sh`
51
+
52
+ ### Step 4: Try the Demo
53
+
54
+ ```bash
55
+ # Run interactive demo
56
+ python demo.py
57
+ ```
58
+
59
+ ## πŸ“ File Structure
60
+
61
+ After setup, your directory should look like this:
62
+
63
+ ```
64
+ unified-ai-services/
65
+ β”œβ”€β”€ app.py # 🌐 Main unified application
66
+ β”œβ”€β”€ configs.py # βš™οΈ Configuration management
67
+ β”œβ”€β”€ setup.py # πŸ› οΈ Automated setup script
68
+ β”œβ”€β”€ manage_services.py # πŸ”§ Service management tool
69
+ β”œβ”€β”€ test_unified.py # πŸ§ͺ Comprehensive test suite
70
+ β”œβ”€β”€ demo.py # 🎬 Interactive demo
71
+ β”œβ”€β”€ requirements.txt # πŸ“¦ Python dependencies
72
+ β”œβ”€β”€ .env # πŸ” Environment configuration
73
+ β”œβ”€β”€ README.md # πŸ“– Documentation
74
+ β”œβ”€β”€ GETTING_STARTED.md # πŸš€ This file
75
+ β”œβ”€β”€ services/ # πŸ“‚ Service implementations
76
+ β”‚ β”œβ”€β”€ ner_service.py # Named Entity Recognition
77
+ β”‚ β”œβ”€β”€ ocr_service.py # Optical Character Recognition
78
+ β”‚ └── rag_service.py # Retrieval-Augmented Generation
79
+ β”œβ”€β”€ exports/ # πŸ“ Generated export files
80
+ β”œβ”€β”€ logs/ # πŸ“ Application logs
81
+ └── temp/ # πŸ—‚οΈ Temporary files
82
+ ```
83
+
84
+ ## βš™οΈ Manual Setup (Alternative)
85
+
86
+ If you prefer manual setup:
87
+
88
+ ### Prerequisites
89
+ - Python 3.8 or higher
90
+ - PostgreSQL with vector extension
91
+ - Azure OpenAI account
92
+ - Azure Document Intelligence account
93
+ - DeepSeek API account
94
+
95
+ ### 1. Install Dependencies
96
+
97
+ ```bash
98
+ pip install -r requirements.txt
99
+ ```
100
+
101
+ ### 2. Configure Environment
102
+
103
+ Create a `.env` file with your configuration:
104
+
105
+ ```bash
106
+ # Server Configuration
107
+ HOST=0.0.0.0
108
+ MAIN_PORT=8000
109
+ NER_PORT=8500
110
+ OCR_PORT=8400
111
+ RAG_PORT=8401
112
+
113
+ # PostgreSQL Configuration
114
+ POSTGRES_HOST=your-postgres-server.com
115
+ POSTGRES_PORT=5432
116
+ POSTGRES_USER=your-username
117
+ POSTGRES_PASSWORD=your-password
118
+ POSTGRES_DATABASE=postgres
119
+
120
+ # Azure OpenAI Configuration
121
+ AZURE_OPENAI_ENDPOINT=https://your-openai.openai.azure.com/
122
+ AZURE_OPENAI_API_KEY=your-api-key
123
+ EMBEDDING_MODEL=text-embedding-3-large
124
+
125
+ # DeepSeek Configuration (for advanced NER)
126
+ DEEPSEEK_ENDPOINT=https://your-deepseek-endpoint/
127
+ DEEPSEEK_API_KEY=your-deepseek-key
128
+ DEEPSEEK_MODEL=DeepSeek-R1-0528
129
+
130
+ # Azure Document Intelligence Configuration
131
+ AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://your-di.cognitiveservices.azure.com/
132
+ AZURE_DOCUMENT_INTELLIGENCE_KEY=your-di-key
133
+
134
+ # Azure Storage Configuration
135
+ AZURE_STORAGE_ACCOUNT_URL=https://yourstorage.blob.core.windows.net/
136
+ AZURE_BLOB_SAS_TOKEN=your-sas-token
137
+ BLOB_CONTAINER=historylog
138
+ ```
139
+
140
+ ### 3. Create Directory Structure
141
+
142
+ ```bash
143
+ mkdir -p services exports logs temp tests data
144
+ ```
145
+
146
+ ### 4. Place Service Files
147
+
148
+ Ensure your service files are in the correct locations:
149
+ - `services/ner_service.py`
150
+ - `services/ocr_service.py`
151
+ - `services/rag_service.py`
152
+
153
+ ## πŸ”§ Service Management
154
+
155
+ ### Using the Service Manager
156
+
157
+ The `manage_services.py` script provides easy service management:
158
+
159
+ ```bash
160
+ # Start individual services
161
+ python manage_services.py start ner
162
+ python manage_services.py start ocr
163
+ python manage_services.py start rag
164
+ python manage_services.py start unified
165
+
166
+ # Start all services
167
+ python manage_services.py start all
168
+
169
+ # Check status
170
+ python manage_services.py status
171
+
172
+ # Test services
173
+ python manage_services.py test ner
174
+ python manage_services.py test all
175
+
176
+ # Stop services
177
+ python manage_services.py stop all
178
+
179
+ # Restart services
180
+ python manage_services.py restart all
181
+
182
+ # List available services
183
+ python manage_services.py list
184
+ ```
185
+
186
+ ### Direct Service Management
187
+
188
+ Start services individually for development:
189
+
190
+ ```bash
191
+ # Terminal 1: Start OCR service
192
+ cd services && python ocr_service.py
193
+
194
+ # Terminal 2: Start RAG service
195
+ cd services && python rag_service.py
196
+
197
+ # Terminal 3: Start NER service
198
+ cd services && python ner_service.py
199
+
200
+ # Terminal 4: Start unified application
201
+ python app.py
202
+ ```
203
+
204
+ ## πŸ§ͺ Testing and Validation
205
+
206
+ ### Comprehensive System Tests
207
+
208
+ ```bash
209
+ # Run all tests
210
+ python test_unified.py
211
+
212
+ # Test output will show:
213
+ # βœ… Unified App Health Check
214
+ # βœ… Individual Service Health
215
+ # βœ… Unified Analysis (Text)
216
+ # βœ… Unified Analysis (URL)
217
+ # βœ… Combined Search
218
+ # βœ… Service Proxies
219
+ # βœ… File Upload (Unified)
220
+ # βœ… Service Discovery
221
+ # βœ… System Performance
222
+ # βœ… Error Handling
223
+ ```
224
+
225
+ ### Individual Service Tests
226
+
227
+ ```bash
228
+ # Test NER service specifically
229
+ python test_ner.py
230
+
231
+ # Test RAG service specifically
232
+ python test_rag.py
233
+ ```
234
+
235
+ ### Quick Health Checks
236
+
237
+ ```bash
238
+ # Check unified system
239
+ curl http://localhost:8000/health
240
+
241
+ # Check individual services
242
+ curl http://localhost:8500/health # NER
243
+ curl http://localhost:8400/health # OCR
244
+ curl http://localhost:8401/health # RAG
245
+ ```
246
+
247
+ ## 🎬 Interactive Demo
248
+
249
+ The demo script showcases all system capabilities:
250
+
251
+ ```bash
252
+ python demo.py
253
+ ```
254
+
255
+ Demo includes:
256
+ - Multi-language text analysis (Thai + English)
257
+ - Entity and relationship extraction
258
+ - RAG document indexing
259
+ - Combined search functionality
260
+ - Service proxy testing
261
+ - Real-time performance monitoring
262
+
263
+ ## 🌐 API Usage
264
+
265
+ ### API Documentation
266
+
267
+ Once running, access interactive documentation:
268
+ - **Unified API**: http://localhost:8000/docs
269
+ - **NER Service**: http://localhost:8500/docs
270
+ - **OCR Service**: http://localhost:8400/docs
271
+ - **RAG Service**: http://localhost:8401/docs
272
+
273
+ ### Key Endpoints
274
+
275
+ #### Unified Analysis
276
+ ```python
277
+ # Analyze text with automatic RAG indexing
278
+ POST /analyze/unified
279
+ {
280
+ "text": "Your text here...",
281
+ "extract_relationships": true,
282
+ "enable_rag_indexing": true,
283
+ "rag_title": "Document Title"
284
+ }
285
+ ```
286
+
287
+ #### Combined Search
288
+ ```python
289
+ # Search with automatic NER enhancement
290
+ POST /search/combined
291
+ {
292
+ "query": "search terms",
293
+ "include_ner_analysis": true,
294
+ "limit": 10
295
+ }
296
+ ```
297
+
298
+ #### Service Proxies
299
+ ```python
300
+ # Direct access to individual services
301
+ POST /ner/analyze/text # NER analysis
302
+ POST /ocr/upload # OCR processing
303
+ POST /rag/search # RAG search
304
+ GET /rag/documents # List documents
305
+ ```
306
+
307
+ ## πŸ” Health Monitoring
308
+
309
+ ### System Status
310
+
311
+ ```bash
312
+ # Get overall system health
313
+ GET /health
314
+
315
+ # Get detailed status
316
+ GET /status
317
+
318
+ # Discover available services
319
+ GET /services
320
+ ```
321
+
322
+ ### Service Monitoring
323
+
324
+ Each service provides health information:
325
+ - Response times
326
+ - Uptime
327
+ - Resource usage
328
+ - Configuration status
329
+ - Error rates
330
+
331
+ ## πŸ› οΈ Troubleshooting
332
+
333
+ ### Common Issues
334
+
335
+ #### 1. Services Won't Start
336
+
337
+ **Check ports:**
338
+ ```bash
339
+ netstat -an | grep :8000
340
+ netstat -an | grep :8500
341
+ netstat -an | grep :8400
342
+ netstat -an | grep :8401
343
+ ```
344
+
345
+ **Verify configuration:**
346
+ ```bash
347
+ python configs.py
348
+ ```
349
+
350
+ **Check dependencies:**
351
+ ```bash
352
+ pip list | grep fastapi
353
+ pip list | grep asyncpg
354
+ ```
355
+
356
+ #### 2. Database Connection Issues
357
+
358
+ **Test connection:**
359
+ ```bash
360
+ # Use your actual connection details
361
+ python -c "
362
+ import asyncio
363
+ import asyncpg
364
+
365
+ async def test():
366
+ conn = await asyncpg.connect('postgresql://user:pass@host:5432/db')
367
+ print('Connected successfully')
368
+ await conn.close()
369
+
370
+ asyncio.run(test())
371
+ "
372
+ ```
373
+
374
+ **Common fixes:**
375
+ - Verify PostgreSQL is running
376
+ - Check firewall rules
377
+ - Confirm SSL requirements
378
+ - Validate credentials
379
+
380
+ #### 3. Azure Service Issues
381
+
382
+ **Check API keys:**
383
+ ```bash
384
+ # Test Azure OpenAI
385
+ curl -H "api-key: YOUR_KEY" "YOUR_ENDPOINT/openai/deployments/YOUR_MODEL/embeddings?api-version=2024-02-01"
386
+
387
+ # Test Document Intelligence
388
+ curl -H "Ocp-Apim-Subscription-Key: YOUR_KEY" "YOUR_ENDPOINT/formrecognizer/info?api-version=2023-07-31"
389
+ ```
390
+
391
+ **Common fixes:**
392
+ - Verify API keys are correct
393
+ - Check service regions
394
+ - Confirm quota limits
395
+ - Validate endpoint URLs
396
+
397
+ #### 4. Performance Issues
398
+
399
+ **Monitor resources:**
400
+ ```bash
401
+ # Check system resources
402
+ top
403
+ htop
404
+ python manage_services.py status
405
+ ```
406
+
407
+ **Common solutions:**
408
+ - Increase system memory
409
+ - Optimize database queries
410
+ - Reduce concurrent requests
411
+ - Check network latency
412
+
413
+ ### Getting Help
414
+
415
+ 1. **Check logs**: Services log to console
416
+ 2. **Run health checks**: Use `/health` endpoints
417
+ 3. **Validate configuration**: Run `python configs.py`
418
+ 4. **Test individual services**: Use service manager
419
+ 5. **Check database connectivity**: Test connection strings
420
+ 6. **Verify Azure services**: Check API endpoints
421
+
422
+ ### Debug Mode
423
+
424
+ Enable debug mode for detailed logging:
425
+
426
+ ```bash
427
+ # In .env file
428
+ DEBUG=True
429
+
430
+ # Or set environment variable
431
+ export DEBUG=true
432
+ python app.py
433
+ ```
434
+
435
+ ## πŸš€ Production Deployment
436
+
437
+ ### Security Considerations
438
+
439
+ 1. **Environment Variables**: Use secure secret management
440
+ 2. **HTTPS**: Enable SSL/TLS in production
441
+ 3. **Authentication**: Implement API authentication
442
+ 4. **Rate Limiting**: Add request rate limiting
443
+ 5. **Input Validation**: Validate all input data
444
+
445
+ ### Performance Optimization
446
+
447
+ 1. **Caching**: Implement Redis caching
448
+ 2. **Load Balancing**: Use reverse proxy (nginx)
449
+ 3. **Database**: Optimize PostgreSQL configuration
450
+ 4. **Monitoring**: Set up application monitoring
451
+ 5. **Scaling**: Consider horizontal scaling
452
+
453
+ ### Deployment Options
454
+
455
+ 1. **Docker**: Containerize services
456
+ 2. **Cloud**: Deploy to Azure/AWS/GCP
457
+ 3. **Kubernetes**: Orchestrate with k8s
458
+ 4. **CI/CD**: Automate deployments
459
+
460
+ ## πŸ“ž Next Steps
461
+
462
+ After successful setup:
463
+
464
+ 1. **Explore the API**: Use the interactive documentation
465
+ 2. **Try the demo**: Run `python demo.py`
466
+ 3. **Run tests**: Execute `python test_unified.py`
467
+ 4. **Monitor system**: Check health endpoints
468
+ 5. **Customize**: Modify services for your needs
469
+ 6. **Scale**: Consider production deployment
470
+
471
+ ## 🎯 Success Indicators
472
+
473
+ You know the system is working when:
474
+ - βœ… All health checks pass
475
+ - βœ… Tests complete successfully
476
+ - βœ… Demo runs without errors
477
+ - βœ… API documentation is accessible
478
+ - βœ… Services respond to requests
479
+ - βœ… Database connections work
480
+ - βœ… Azure integrations function
481
+ - βœ… File uploads process correctly
482
+ - βœ… Search returns results
483
+ - βœ… Export files generate properly
484
+
485
+ **Congratulations! Your Unified AI Services system is ready to use! πŸŽ‰**
manage_services.py ADDED
@@ -0,0 +1,550 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Service Management Tool for Unified AI Services
4
+ Helps start, stop, monitor, and troubleshoot individual services
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import time
10
+ import signal
11
+ import subprocess
12
+ import asyncio
13
+ import json
14
+ from pathlib import Path
15
+ from typing import Dict, List, Optional, Tuple
16
+ import argparse
17
+
18
+ import httpx
19
+ import psutil
20
+
21
+ # Import configuration if available
22
+ try:
23
+ from configs import get_config, validate_environment
24
+ config = get_config()
25
+ except ImportError:
26
+ print("⚠️ Could not import configs. Using default values.")
27
+ config = None
28
+
29
+ class ServiceManager:
30
+ """Manages individual services for development and troubleshooting"""
31
+
32
+ def __init__(self):
33
+ self.processes: Dict[str, subprocess.Popen] = {}
34
+ self.service_configs = {
35
+ "ner": {
36
+ "script": "services/ner_service.py",
37
+ "port": 8500,
38
+ "description": "Named Entity Recognition with relationship extraction"
39
+ },
40
+ "ocr": {
41
+ "script": "services/ocr_service.py",
42
+ "port": 8400,
43
+ "description": "Optical Character Recognition with document processing"
44
+ },
45
+ "rag": {
46
+ "script": "services/rag_service.py",
47
+ "port": 8401,
48
+ "description": "Retrieval-Augmented Generation with vector search"
49
+ },
50
+ "unified": {
51
+ "script": "app.py",
52
+ "port": 8000,
53
+ "description": "Unified application coordinating all services"
54
+ }
55
+ }
56
+
57
+ # Update ports from config if available
58
+ if config:
59
+ self.service_configs["ner"]["port"] = config.ner.PORT
60
+ self.service_configs["ocr"]["port"] = config.ocr.PORT
61
+ self.service_configs["rag"]["port"] = config.rag.PORT
62
+ self.service_configs["unified"]["port"] = config.MAIN_PORT
63
+
64
+ def print_header(self, title: str):
65
+ """Print formatted header"""
66
+ print("\n" + "=" * 60)
67
+ print(f" {title}")
68
+ print("=" * 60)
69
+
70
+ def print_service_info(self, service_name: str):
71
+ """Print service information"""
72
+ if service_name not in self.service_configs:
73
+ return
74
+
75
+ service = self.service_configs[service_name]
76
+ print(f"πŸ“ {service_name.upper()} Service")
77
+ print(f" Description: {service['description']}")
78
+ print(f" Script: {service['script']}")
79
+ print(f" Port: {service['port']}")
80
+ print(f" URL: http://localhost:{service['port']}")
81
+
82
+ def is_port_in_use(self, port: int) -> bool:
83
+ """Check if port is in use"""
84
+ try:
85
+ for conn in psutil.net_connections():
86
+ if conn.laddr.port == port:
87
+ return True
88
+ return False
89
+ except:
90
+ return False
91
+
92
+ async def check_service_health(self, service_name: str) -> Tuple[bool, Optional[Dict]]:
93
+ """Check service health"""
94
+ if service_name not in self.service_configs:
95
+ return False, None
96
+
97
+ port = self.service_configs[service_name]["port"]
98
+
99
+ try:
100
+ async with httpx.AsyncClient() as client:
101
+ response = await client.get(
102
+ f"http://localhost:{port}/health",
103
+ timeout=5.0
104
+ )
105
+ if response.status_code == 200:
106
+ return True, response.json()
107
+ else:
108
+ return False, {"error": f"HTTP {response.status_code}"}
109
+ except Exception as e:
110
+ return False, {"error": str(e)}
111
+
112
+ def start_service(self, service_name: str) -> bool:
113
+ """Start a specific service"""
114
+ if service_name not in self.service_configs:
115
+ print(f"❌ Unknown service: {service_name}")
116
+ return False
117
+
118
+ service = self.service_configs[service_name]
119
+ script_path = service["script"]
120
+ port = service["port"]
121
+
122
+ # Check if script exists
123
+ if not Path(script_path).exists():
124
+ print(f"❌ Service script not found: {script_path}")
125
+ return False
126
+
127
+ # Check if port is already in use
128
+ if self.is_port_in_use(port):
129
+ print(f"⚠️ Port {port} is already in use. Service may already be running.")
130
+ return False
131
+
132
+ # Check if service is already running in our process list
133
+ if service_name in self.processes:
134
+ process = self.processes[service_name]
135
+ if process.poll() is None: # Process is still running
136
+ print(f"⚠️ {service_name} service is already running (PID: {process.pid})")
137
+ return False
138
+
139
+ try:
140
+ print(f"πŸš€ Starting {service_name} service...")
141
+ print(f" Script: {script_path}")
142
+ print(f" Port: {port}")
143
+
144
+ # Start the service
145
+ if sys.platform == "win32":
146
+ process = subprocess.Popen([
147
+ sys.executable, script_path
148
+ ], creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
149
+ else:
150
+ process = subprocess.Popen([
151
+ sys.executable, script_path
152
+ ], preexec_fn=os.setsid)
153
+
154
+ self.processes[service_name] = process
155
+
156
+ # Wait a moment for startup
157
+ time.sleep(2)
158
+
159
+ # Check if process is still running
160
+ if process.poll() is None:
161
+ print(f"βœ… {service_name} service started successfully (PID: {process.pid})")
162
+ return True
163
+ else:
164
+ print(f"❌ {service_name} service failed to start")
165
+ return False
166
+
167
+ except Exception as e:
168
+ print(f"❌ Failed to start {service_name} service: {e}")
169
+ return False
170
+
171
+ def stop_service(self, service_name: str) -> bool:
172
+ """Stop a specific service"""
173
+ if service_name not in self.service_configs:
174
+ print(f"❌ Unknown service: {service_name}")
175
+ return False
176
+
177
+ port = self.service_configs[service_name]["port"]
178
+
179
+ # Try to stop our managed process first
180
+ if service_name in self.processes:
181
+ process = self.processes[service_name]
182
+ if process.poll() is None: # Process is still running
183
+ try:
184
+ print(f"πŸ›‘ Stopping {service_name} service (PID: {process.pid})...")
185
+
186
+ if sys.platform == "win32":
187
+ process.send_signal(signal.CTRL_BREAK_EVENT)
188
+ else:
189
+ os.killpg(os.getpgid(process.pid), signal.SIGTERM)
190
+
191
+ # Wait for graceful shutdown
192
+ try:
193
+ process.wait(timeout=10)
194
+ print(f"βœ… {service_name} service stopped")
195
+ del self.processes[service_name]
196
+ return True
197
+ except subprocess.TimeoutExpired:
198
+ print(f"⚠️ Force killing {service_name} service...")
199
+ process.kill()
200
+ del self.processes[service_name]
201
+ return True
202
+
203
+ except Exception as e:
204
+ print(f"❌ Error stopping {service_name} service: {e}")
205
+ return False
206
+
207
+ # Try to find and stop any process using the port
208
+ try:
209
+ for proc in psutil.process_iter(['pid', 'name', 'connections']):
210
+ try:
211
+ for conn in proc.info['connections'] or []:
212
+ if conn.laddr.port == port:
213
+ print(f"πŸ›‘ Found process using port {port} (PID: {proc.pid})")
214
+ proc.terminate()
215
+ try:
216
+ proc.wait(timeout=5)
217
+ print(f"βœ… Process {proc.pid} terminated")
218
+ return True
219
+ except psutil.TimeoutExpired:
220
+ proc.kill()
221
+ print(f"βœ… Process {proc.pid} killed")
222
+ return True
223
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
224
+ continue
225
+ except Exception as e:
226
+ print(f"❌ Error finding process on port {port}: {e}")
227
+
228
+ print(f"⚠️ No running {service_name} service found")
229
+ return False
230
+
231
+ def stop_all_services(self):
232
+ """Stop all managed services"""
233
+ print("πŸ›‘ Stopping all services...")
234
+
235
+ for service_name in self.service_configs.keys():
236
+ self.stop_service(service_name)
237
+
238
+ async def get_service_status(self, service_name: str) -> Dict:
239
+ """Get detailed service status"""
240
+ if service_name not in self.service_configs:
241
+ return {"status": "unknown", "error": "Unknown service"}
242
+
243
+ service = self.service_configs[service_name]
244
+ port = service["port"]
245
+
246
+ status = {
247
+ "name": service_name,
248
+ "description": service["description"],
249
+ "port": port,
250
+ "script": service["script"],
251
+ "managed_process": False,
252
+ "port_in_use": self.is_port_in_use(port),
253
+ "health_check": False,
254
+ "health_data": None
255
+ }
256
+
257
+ # Check if we have a managed process
258
+ if service_name in self.processes:
259
+ process = self.processes[service_name]
260
+ if process.poll() is None:
261
+ status["managed_process"] = True
262
+ status["pid"] = process.pid
263
+ try:
264
+ proc = psutil.Process(process.pid)
265
+ status["cpu_percent"] = proc.cpu_percent()
266
+ status["memory_mb"] = proc.memory_info().rss / 1024 / 1024
267
+ status["create_time"] = proc.create_time()
268
+ status["uptime"] = time.time() - proc.create_time()
269
+ except:
270
+ pass
271
+
272
+ # Check health endpoint
273
+ health_ok, health_data = await self.check_service_health(service_name)
274
+ status["health_check"] = health_ok
275
+ status["health_data"] = health_data
276
+
277
+ return status
278
+
279
+ async def status_all_services(self):
280
+ """Show status of all services"""
281
+ self.print_header("Service Status Overview")
282
+
283
+ for service_name in self.service_configs.keys():
284
+ status = await self.get_service_status(service_name)
285
+
286
+ print(f"\nπŸ“Š {service_name.upper()} Service")
287
+ print(f" Port: {status['port']}")
288
+ print(f" Script: {status['script']}")
289
+
290
+ if status["managed_process"]:
291
+ print(f" βœ… Managed process running (PID: {status.get('pid', 'unknown')})")
292
+ if 'uptime' in status:
293
+ uptime_str = f"{status['uptime']:.0f} seconds"
294
+ print(f" ⏱️ Uptime: {uptime_str}")
295
+ if 'cpu_percent' in status:
296
+ print(f" πŸ’» CPU: {status['cpu_percent']:.1f}%")
297
+ if 'memory_mb' in status:
298
+ print(f" 🧠 Memory: {status['memory_mb']:.1f} MB")
299
+ elif status["port_in_use"]:
300
+ print(f" ⚠️ Port in use (external process)")
301
+ else:
302
+ print(f" ❌ Not running")
303
+
304
+ if status["health_check"]:
305
+ print(f" βœ… Health check: OK")
306
+ if status["health_data"]:
307
+ health = status["health_data"]
308
+ if isinstance(health, dict) and "status" in health:
309
+ print(f" Status: {health['status']}")
310
+ else:
311
+ print(f" ❌ Health check: Failed")
312
+ if status["health_data"] and "error" in status["health_data"]:
313
+ print(f" Error: {status['health_data']['error']}")
314
+
315
+ async def test_service(self, service_name: str):
316
+ """Test a specific service"""
317
+ if service_name not in self.service_configs:
318
+ print(f"❌ Unknown service: {service_name}")
319
+ return
320
+
321
+ self.print_header(f"Testing {service_name.upper()} Service")
322
+
323
+ status = await self.get_service_status(service_name)
324
+
325
+ # Basic status
326
+ if not status["port_in_use"]:
327
+ print("❌ Service is not running")
328
+ return
329
+
330
+ if not status["health_check"]:
331
+ print("❌ Health check failed")
332
+ if status["health_data"]:
333
+ print(f" Error: {status['health_data']}")
334
+ return
335
+
336
+ print("βœ… Service is running and healthy")
337
+
338
+ # Service-specific tests
339
+ port = status["port"]
340
+
341
+ if service_name == "ner":
342
+ await self.test_ner_service(port)
343
+ elif service_name == "ocr":
344
+ await self.test_ocr_service(port)
345
+ elif service_name == "rag":
346
+ await self.test_rag_service(port)
347
+ elif service_name == "unified":
348
+ await self.test_unified_service(port)
349
+
350
+ async def test_ner_service(self, port: int):
351
+ """Test NER service functionality"""
352
+ print("\nπŸ§ͺ Testing NER functionality...")
353
+
354
+ try:
355
+ test_data = {
356
+ "text": "John Smith works at Microsoft in Seattle.",
357
+ "extract_relationships": True,
358
+ "include_embeddings": False,
359
+ "generate_graph_files": False
360
+ }
361
+
362
+ async with httpx.AsyncClient() as client:
363
+ response = await client.post(
364
+ f"http://localhost:{port}/analyze/text",
365
+ json=test_data,
366
+ timeout=30.0
367
+ )
368
+
369
+ if response.status_code == 200:
370
+ result = response.json()
371
+ if result.get("success"):
372
+ entities = result.get("entities", [])
373
+ relationships = result.get("relationships", [])
374
+ print(f" βœ… NER analysis successful")
375
+ print(f" πŸ“Š Found {len(entities)} entities, {len(relationships)} relationships")
376
+ else:
377
+ print(f" ❌ NER analysis failed: {result.get('error', 'Unknown error')}")
378
+ else:
379
+ print(f" ❌ NER test failed: HTTP {response.status_code}")
380
+
381
+ except Exception as e:
382
+ print(f" ❌ NER test error: {e}")
383
+
384
+ async def test_ocr_service(self, port: int):
385
+ """Test OCR service functionality"""
386
+ print("\nπŸ§ͺ Testing OCR functionality...")
387
+
388
+ try:
389
+ async with httpx.AsyncClient() as client:
390
+ # Test health endpoint (OCR doesn't have complex test without files)
391
+ response = await client.get(f"http://localhost:{port}/health")
392
+
393
+ if response.status_code == 200:
394
+ print(" βœ… OCR service is responsive")
395
+ else:
396
+ print(f" ❌ OCR test failed: HTTP {response.status_code}")
397
+
398
+ except Exception as e:
399
+ print(f" ❌ OCR test error: {e}")
400
+
401
+ async def test_rag_service(self, port: int):
402
+ """Test RAG service functionality"""
403
+ print("\nπŸ§ͺ Testing RAG functionality...")
404
+
405
+ try:
406
+ async with httpx.AsyncClient() as client:
407
+ # Test document listing
408
+ response = await client.get(f"http://localhost:{port}/documents?limit=5")
409
+
410
+ if response.status_code == 200:
411
+ result = response.json()
412
+ documents = result.get("documents", [])
413
+ print(f" βœ… RAG service is responsive")
414
+ print(f" πŸ“Š Found {len(documents)} documents in database")
415
+ else:
416
+ print(f" ❌ RAG test failed: HTTP {response.status_code}")
417
+
418
+ except Exception as e:
419
+ print(f" ❌ RAG test error: {e}")
420
+
421
+ async def test_unified_service(self, port: int):
422
+ """Test unified service functionality"""
423
+ print("\nπŸ§ͺ Testing Unified functionality...")
424
+
425
+ try:
426
+ async with httpx.AsyncClient() as client:
427
+ # Test service discovery
428
+ response = await client.get(f"http://localhost:{port}/services")
429
+
430
+ if response.status_code == 200:
431
+ result = response.json()
432
+ services = result.get("services", {})
433
+ print(f" βœ… Unified service is responsive")
434
+ print(f" πŸ“Š Discovered {len(services)} services")
435
+ else:
436
+ print(f" ❌ Unified test failed: HTTP {response.status_code}")
437
+
438
+ except Exception as e:
439
+ print(f" ❌ Unified test error: {e}")
440
+
441
+ def list_services(self):
442
+ """List all available services"""
443
+ self.print_header("Available Services")
444
+
445
+ for service_name, service in self.service_configs.items():
446
+ print(f"\nπŸ“ {service_name}")
447
+ print(f" Description: {service['description']}")
448
+ print(f" Script: {service['script']}")
449
+ print(f" Port: {service['port']}")
450
+ print(f" URL: http://localhost:{service['port']}")
451
+
452
+ async def main():
453
+ """Main function with command line interface"""
454
+ parser = argparse.ArgumentParser(
455
+ description="Service Management Tool for Unified AI Services",
456
+ formatter_class=argparse.RawDescriptionHelpFormatter,
457
+ epilog="""
458
+ Examples:
459
+ python manage_services.py start ner # Start NER service
460
+ python manage_services.py stop all # Stop all services
461
+ python manage_services.py status # Show status of all services
462
+ python manage_services.py test rag # Test RAG service
463
+ python manage_services.py list # List available services
464
+ """
465
+ )
466
+
467
+ parser.add_argument(
468
+ "action",
469
+ choices=["start", "stop", "restart", "status", "test", "list"],
470
+ help="Action to perform"
471
+ )
472
+
473
+ parser.add_argument(
474
+ "service",
475
+ nargs="?",
476
+ choices=["ner", "ocr", "rag", "unified", "all"],
477
+ help="Service to act on (use 'all' for all services)"
478
+ )
479
+
480
+ args = parser.parse_args()
481
+
482
+ manager = ServiceManager()
483
+
484
+ # Handle actions that don't require a service argument
485
+ if args.action == "list":
486
+ manager.list_services()
487
+ return
488
+
489
+ if args.action == "status":
490
+ await manager.status_all_services()
491
+ return
492
+
493
+ # Validate service argument for other actions
494
+ if not args.service:
495
+ print("❌ Service argument is required for this action")
496
+ parser.print_help()
497
+ return
498
+
499
+ # Handle service-specific actions
500
+ if args.action == "start":
501
+ if args.service == "all":
502
+ # Start services in dependency order
503
+ services_order = ["ocr", "rag", "ner", "unified"]
504
+ for service in services_order:
505
+ success = manager.start_service(service)
506
+ if success:
507
+ # Wait a moment between services
508
+ time.sleep(3)
509
+ else:
510
+ print(f"⚠️ Failed to start {service}, continuing with other services...")
511
+ else:
512
+ manager.start_service(args.service)
513
+
514
+ elif args.action == "stop":
515
+ if args.service == "all":
516
+ manager.stop_all_services()
517
+ else:
518
+ manager.stop_service(args.service)
519
+
520
+ elif args.action == "restart":
521
+ if args.service == "all":
522
+ print("πŸ”„ Restarting all services...")
523
+ manager.stop_all_services()
524
+ time.sleep(2)
525
+ services_order = ["ocr", "rag", "ner", "unified"]
526
+ for service in services_order:
527
+ manager.start_service(service)
528
+ time.sleep(3)
529
+ else:
530
+ print(f"πŸ”„ Restarting {args.service} service...")
531
+ manager.stop_service(args.service)
532
+ time.sleep(2)
533
+ manager.start_service(args.service)
534
+
535
+ elif args.action == "test":
536
+ if args.service == "all":
537
+ for service_name in manager.service_configs.keys():
538
+ await manager.test_service(service_name)
539
+ print() # Add spacing between tests
540
+ else:
541
+ await manager.test_service(args.service)
542
+
543
+ if __name__ == "__main__":
544
+ try:
545
+ asyncio.run(main())
546
+ except KeyboardInterrupt:
547
+ print("\nπŸ›‘ Operation cancelled by user")
548
+ except Exception as e:
549
+ print(f"\n❌ Error: {e}")
550
+ sys.exit(1)
requirements.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Unified AI Services - Python Dependencies
2
+ # Core framework dependencies
3
+ fastapi>=0.104.1
4
+ uvicorn[standard]>=0.24.0
5
+ pydantic>=2.5.0
6
+ python-multipart>=0.0.6
7
+
8
+ # HTTP client and async support
9
+ httpx>=0.25.0
10
+ aiofiles>=23.2.1
11
+
12
+ # Database dependencies
13
+ asyncpg>=0.29.0
14
+ psycopg2-binary>=2.9.7
15
+
16
+ # Azure services
17
+ azure-ai-inference>=1.0.0
18
+ azure-core>=1.29.0
19
+ azure-storage-blob>=12.19.0
20
+ azure-ai-documentintelligence>=1.0.0
21
+
22
+ # OpenAI integration
23
+ openai>=1.3.0
24
+
25
+ # Document processing
26
+ python-docx>=1.1.0
27
+ beautifulsoup4>=4.12.0
28
+ lxml>=4.9.0
29
+ Pillow>=10.0.0
30
+
31
+ # Utilities
32
+ requests>=2.31.0
33
+ numpy>=1.24.0
34
+ python-dotenv>=1.0.0
35
+ psutil>=5.9.0
36
+
37
+ # Development and testing (optional)
38
+ pytest>=7.4.0
39
+ pytest-asyncio>=0.21.0
40
+ black>=23.0.0
41
+ flake8>=6.0.0
42
+
43
+ # Additional data processing
44
+ pandas>=2.0.0
45
+ scikit-learn>=1.3.0
setup.py ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Automated Setup and Configuration for Unified AI Services
4
+ Helps set up the environment, validate configurations, and initialize services
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import asyncio
11
+ import subprocess
12
+ from pathlib import Path
13
+ from typing import Dict, List, Optional
14
+ import shutil
15
+
16
+ def print_header(title: str):
17
+ """Print a formatted header"""
18
+ print("\n" + "=" * 60)
19
+ print(f" {title}")
20
+ print("=" * 60)
21
+
22
+ def print_step(step: str):
23
+ """Print a step indicator"""
24
+ print(f"\nπŸ”§ {step}")
25
+
26
+ def check_python_version():
27
+ """Check Python version compatibility"""
28
+ print_step("Checking Python version...")
29
+
30
+ version = sys.version_info
31
+ if version.major < 3 or (version.major == 3 and version.minor < 8):
32
+ print("❌ Python 3.8 or higher is required")
33
+ print(f" Current version: {version.major}.{version.minor}.{version.micro}")
34
+ return False
35
+
36
+ print(f"βœ… Python {version.major}.{version.minor}.{version.micro} is compatible")
37
+ return True
38
+
39
+ def create_directory_structure():
40
+ """Create necessary directory structure"""
41
+ print_step("Creating directory structure...")
42
+
43
+ directories = [
44
+ "services",
45
+ "exports",
46
+ "logs",
47
+ "temp",
48
+ "tests",
49
+ "data"
50
+ ]
51
+
52
+ for directory in directories:
53
+ path = Path(directory)
54
+ if not path.exists():
55
+ path.mkdir(parents=True, exist_ok=True)
56
+ print(f" βœ… Created directory: {directory}")
57
+ else:
58
+ print(f" βœ“ Directory exists: {directory}")
59
+
60
+ def check_service_files():
61
+ """Check if service files exist"""
62
+ print_step("Checking service files...")
63
+
64
+ required_files = {
65
+ "services/ner_service.py": "NER Service",
66
+ "services/ocr_service.py": "OCR Service",
67
+ "services/rag_service.py": "RAG Service",
68
+ "app.py": "Unified Application",
69
+ "configs.py": "Configuration Management"
70
+ }
71
+
72
+ missing_files = []
73
+
74
+ for file_path, description in required_files.items():
75
+ if Path(file_path).exists():
76
+ print(f" βœ… {description}: {file_path}")
77
+ else:
78
+ print(f" ❌ {description}: {file_path} (MISSING)")
79
+ missing_files.append(file_path)
80
+
81
+ if missing_files:
82
+ print(f"\n⚠️ Missing files detected:")
83
+ for file_path in missing_files:
84
+ print(f" - {file_path}")
85
+ print("\nPlease ensure all service files are in the correct locations.")
86
+ return False
87
+
88
+ return True
89
+
90
+ def create_env_file():
91
+ """Create or update .env file with user input"""
92
+ print_step("Setting up environment configuration...")
93
+
94
+ env_path = Path(".env")
95
+
96
+ if env_path.exists():
97
+ response = input(" .env file already exists. Overwrite? (y/N): ")
98
+ if response.lower() != 'y':
99
+ print(" Keeping existing .env file")
100
+ return True
101
+
102
+ print("\nπŸ“ Please provide the following configuration values:")
103
+ print(" (Press Enter to use default values shown in brackets)")
104
+
105
+ # Collect configuration values
106
+ config_values = {}
107
+
108
+ # Server Configuration
109
+ print("\n🌐 Server Configuration:")
110
+ config_values['HOST'] = input(" Host [0.0.0.0]: ") or "0.0.0.0"
111
+ config_values['DEBUG'] = input(" Debug mode (true/false) [True]: ") or "True"
112
+ config_values['MAIN_PORT'] = input(" Main app port [8000]: ") or "8000"
113
+ config_values['NER_PORT'] = input(" NER service port [8500]: ") or "8500"
114
+ config_values['OCR_PORT'] = input(" OCR service port [8400]: ") or "8400"
115
+ config_values['RAG_PORT'] = input(" RAG service port [8401]: ") or "8401"
116
+
117
+ # PostgreSQL Configuration
118
+ print("\nπŸ—„οΈ PostgreSQL Configuration:")
119
+ config_values['POSTGRES_HOST'] = input(" PostgreSQL host: ")
120
+ config_values['POSTGRES_PORT'] = input(" PostgreSQL port [5432]: ") or "5432"
121
+ config_values['POSTGRES_USER'] = input(" PostgreSQL user: ")
122
+ config_values['POSTGRES_PASSWORD'] = input(" PostgreSQL password: ")
123
+ config_values['POSTGRES_DATABASE'] = input(" PostgreSQL database [postgres]: ") or "postgres"
124
+
125
+ # Azure OpenAI Configuration
126
+ print("\nπŸ€– Azure OpenAI Configuration:")
127
+ config_values['AZURE_OPENAI_ENDPOINT'] = input(" Azure OpenAI endpoint: ")
128
+ config_values['AZURE_OPENAI_API_KEY'] = input(" Azure OpenAI API key: ")
129
+ config_values['EMBEDDING_MODEL'] = input(" Embedding model [text-embedding-3-large]: ") or "text-embedding-3-large"
130
+
131
+ # DeepSeek Configuration
132
+ print("\n🧠 DeepSeek Configuration:")
133
+ config_values['DEEPSEEK_ENDPOINT'] = input(" DeepSeek endpoint: ")
134
+ config_values['DEEPSEEK_API_KEY'] = input(" DeepSeek API key: ")
135
+ config_values['DEEPSEEK_MODEL'] = input(" DeepSeek model [DeepSeek-R1-0528]: ") or "DeepSeek-R1-0528"
136
+
137
+ # Azure Document Intelligence Configuration
138
+ print("\nπŸ“„ Azure Document Intelligence Configuration:")
139
+ config_values['AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT'] = input(" Document Intelligence endpoint: ")
140
+ config_values['AZURE_DOCUMENT_INTELLIGENCE_KEY'] = input(" Document Intelligence API key: ")
141
+
142
+ # Azure Storage Configuration
143
+ print("\nπŸ’Ύ Azure Storage Configuration:")
144
+ config_values['AZURE_STORAGE_ACCOUNT_URL'] = input(" Storage account URL: ")
145
+ config_values['AZURE_BLOB_SAS_TOKEN'] = input(" Blob SAS token: ")
146
+ config_values['BLOB_CONTAINER'] = input(" Blob container [historylog]: ") or "historylog"
147
+
148
+ # Write .env file
149
+ try:
150
+ with open(".env", "w") as f:
151
+ f.write("# =================================================================\n")
152
+ f.write("# Unified AI Services - Environment Configuration\n")
153
+ f.write("# Generated by setup.py\n")
154
+ f.write("# =================================================================\n\n")
155
+
156
+ f.write("# Server Configuration\n")
157
+ f.write(f"HOST={config_values['HOST']}\n")
158
+ f.write(f"DEBUG={config_values['DEBUG']}\n")
159
+ f.write(f"MAIN_PORT={config_values['MAIN_PORT']}\n")
160
+ f.write(f"NER_PORT={config_values['NER_PORT']}\n")
161
+ f.write(f"OCR_PORT={config_values['OCR_PORT']}\n")
162
+ f.write(f"RAG_PORT={config_values['RAG_PORT']}\n\n")
163
+
164
+ f.write("# PostgreSQL Configuration\n")
165
+ f.write(f"POSTGRES_HOST={config_values['POSTGRES_HOST']}\n")
166
+ f.write(f"POSTGRES_PORT={config_values['POSTGRES_PORT']}\n")
167
+ f.write(f"POSTGRES_USER={config_values['POSTGRES_USER']}\n")
168
+ f.write(f"POSTGRES_PASSWORD={config_values['POSTGRES_PASSWORD']}\n")
169
+ f.write(f"POSTGRES_DATABASE={config_values['POSTGRES_DATABASE']}\n\n")
170
+
171
+ f.write("# Azure OpenAI Configuration\n")
172
+ f.write(f"AZURE_OPENAI_ENDPOINT={config_values['AZURE_OPENAI_ENDPOINT']}\n")
173
+ f.write(f"AZURE_OPENAI_API_KEY={config_values['AZURE_OPENAI_API_KEY']}\n")
174
+ f.write(f"EMBEDDING_MODEL={config_values['EMBEDDING_MODEL']}\n")
175
+ f.write(f"AZURE_OPENAI_DEPLOYMENT_NAME={config_values['EMBEDDING_MODEL']}\n\n")
176
+
177
+ f.write("# DeepSeek Configuration\n")
178
+ f.write(f"DEEPSEEK_ENDPOINT={config_values['DEEPSEEK_ENDPOINT']}\n")
179
+ f.write(f"DEEPSEEK_API_KEY={config_values['DEEPSEEK_API_KEY']}\n")
180
+ f.write(f"DEEPSEEK_MODEL={config_values['DEEPSEEK_MODEL']}\n\n")
181
+
182
+ f.write("# Azure Document Intelligence Configuration\n")
183
+ f.write(f"AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT={config_values['AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT']}\n")
184
+ f.write(f"AZURE_DOCUMENT_INTELLIGENCE_KEY={config_values['AZURE_DOCUMENT_INTELLIGENCE_KEY']}\n\n")
185
+
186
+ f.write("# Azure Storage Configuration\n")
187
+ f.write(f"AZURE_STORAGE_ACCOUNT_URL={config_values['AZURE_STORAGE_ACCOUNT_URL']}\n")
188
+ f.write(f"AZURE_BLOB_SAS_TOKEN=\"{config_values['AZURE_BLOB_SAS_TOKEN']}\"\n")
189
+ f.write(f"BLOB_CONTAINER={config_values['BLOB_CONTAINER']}\n\n")
190
+
191
+ f.write("# Processing Configuration\n")
192
+ f.write("MAX_FILE_SIZE=50\n")
193
+ f.write("REQUEST_TIMEOUT=300\n")
194
+ f.write("LOG_LEVEL=INFO\n")
195
+ f.write("ALLOWED_ORIGINS=*\n")
196
+
197
+ f.write("\n# RAG Specific Configuration\n")
198
+ f.write("PG_HOST=${POSTGRES_HOST}\n")
199
+ f.write("PG_PORT=${POSTGRES_PORT}\n")
200
+ f.write("PG_DATABASE=vectorsearch\n")
201
+ f.write("PG_USER=${POSTGRES_USER}\n")
202
+ f.write("PG_PASSWORD=${POSTGRES_PASSWORD}\n")
203
+ f.write("PG_SSL_MODE=require\n")
204
+ f.write("CHUNK_SIZE=1000\n")
205
+ f.write("CHUNK_OVERLAP=200\n")
206
+ f.write("MIN_CHUNK_SIZE=50\n")
207
+
208
+ print(" βœ… .env file created successfully")
209
+ return True
210
+
211
+ except Exception as e:
212
+ print(f" ❌ Failed to create .env file: {e}")
213
+ return False
214
+
215
+ def install_dependencies():
216
+ """Install Python dependencies"""
217
+ print_step("Installing Python dependencies...")
218
+
219
+ # Create requirements.txt if it doesn't exist
220
+ requirements_path = Path("requirements.txt")
221
+ if not requirements_path.exists():
222
+ print(" Creating requirements.txt file...")
223
+
224
+ requirements = [
225
+ "fastapi>=0.104.1",
226
+ "uvicorn[standard]>=0.24.0",
227
+ "httpx>=0.25.0",
228
+ "asyncpg>=0.29.0",
229
+ "psutil>=5.9.0",
230
+ "pydantic>=2.5.0",
231
+ "python-dotenv>=1.0.0",
232
+ "python-multipart>=0.0.6",
233
+ "azure-ai-inference>=1.0.0",
234
+ "azure-core>=1.29.0",
235
+ "azure-storage-blob>=12.19.0",
236
+ "azure-ai-documentintelligence>=1.0.0",
237
+ "openai>=1.3.0",
238
+ "beautifulsoup4>=4.12.0",
239
+ "requests>=2.31.0",
240
+ "numpy>=1.24.0",
241
+ "Pillow>=10.0.0",
242
+ "python-docx>=1.1.0",
243
+ "lxml>=4.9.0",
244
+ "aiofiles>=23.2.1"
245
+ ]
246
+
247
+ try:
248
+ with open("requirements.txt", "w") as f:
249
+ for req in requirements:
250
+ f.write(f"{req}\n")
251
+ print(" βœ… requirements.txt created")
252
+ except Exception as e:
253
+ print(f" ❌ Failed to create requirements.txt: {e}")
254
+ return False
255
+
256
+ # Install dependencies
257
+ try:
258
+ print(" Installing dependencies (this may take a few minutes)...")
259
+ result = subprocess.run([
260
+ sys.executable, "-m", "pip", "install", "-r", "requirements.txt"
261
+ ], capture_output=True, text=True)
262
+
263
+ if result.returncode == 0:
264
+ print(" βœ… Dependencies installed successfully")
265
+ return True
266
+ else:
267
+ print(f" ❌ Failed to install dependencies:")
268
+ print(f" {result.stderr}")
269
+ return False
270
+
271
+ except Exception as e:
272
+ print(f" ❌ Error installing dependencies: {e}")
273
+ return False
274
+
275
+ def validate_configuration():
276
+ """Validate the configuration"""
277
+ print_step("Validating configuration...")
278
+
279
+ try:
280
+ from configs import get_config, validate_environment
281
+
282
+ config = get_config()
283
+ config.print_configuration_summary()
284
+
285
+ if validate_environment():
286
+ print("\n βœ… Configuration validation passed!")
287
+ return True
288
+ else:
289
+ print("\n ❌ Configuration validation failed!")
290
+ print(" Please check your .env file and update missing values.")
291
+ return False
292
+
293
+ except ImportError as e:
294
+ print(f" ❌ Failed to import configuration module: {e}")
295
+ return False
296
+ except Exception as e:
297
+ print(f" ❌ Configuration validation error: {e}")
298
+ return False
299
+
300
+ async def test_database_connection():
301
+ """Test database connection"""
302
+ print_step("Testing database connection...")
303
+
304
+ try:
305
+ from configs import get_config
306
+ import asyncpg
307
+
308
+ config = get_config()
309
+
310
+ # Test connection
311
+ conn = await asyncpg.connect(
312
+ host=config.ner.POSTGRES_HOST,
313
+ port=config.ner.POSTGRES_PORT,
314
+ database=config.ner.POSTGRES_DATABASE,
315
+ user=config.ner.POSTGRES_USER,
316
+ password=config.ner.POSTGRES_PASSWORD,
317
+ ssl='require',
318
+ timeout=10
319
+ )
320
+
321
+ # Test basic query
322
+ version = await conn.fetchval("SELECT version()")
323
+ await conn.close()
324
+
325
+ print(" βœ… Database connection successful")
326
+ print(f" Database version: {version[:50]}...")
327
+ return True
328
+
329
+ except Exception as e:
330
+ print(f" ❌ Database connection failed: {e}")
331
+ print("\n πŸ’‘ Troubleshooting tips:")
332
+ print(" 1. Check your PostgreSQL server is running")
333
+ print(" 2. Verify host, port, username, and password")
334
+ print(" 3. Ensure your IP is allowlisted in firewall rules")
335
+ print(" 4. Check SSL configuration")
336
+ return False
337
+
338
+ def create_startup_script():
339
+ """Create startup script for easy service management"""
340
+ print_step("Creating startup script...")
341
+
342
+ # Create startup script for Windows
343
+ if sys.platform == "win32":
344
+ script_content = """@echo off
345
+ echo Starting Unified AI Services...
346
+ echo.
347
+
348
+ echo Starting in 3 seconds...
349
+ timeout /t 3 /nobreak >nul
350
+
351
+ echo Starting unified application...
352
+ python app.py
353
+
354
+ pause
355
+ """
356
+ with open("start_services.bat", "w") as f:
357
+ f.write(script_content)
358
+ print(" βœ… Created start_services.bat")
359
+
360
+ # Create startup script for Unix/Linux/Mac
361
+ else:
362
+ script_content = """#!/bin/bash
363
+
364
+ echo "Starting Unified AI Services..."
365
+ echo
366
+
367
+ echo "Starting in 3 seconds..."
368
+ sleep 3
369
+
370
+ echo "Starting unified application..."
371
+ python app.py
372
+ """
373
+ with open("start_services.sh", "w") as f:
374
+ f.write(script_content)
375
+
376
+ # Make executable
377
+ os.chmod("start_services.sh", 0o755)
378
+ print(" βœ… Created start_services.sh")
379
+
380
+ return True
381
+
382
+ def create_test_script():
383
+ """Create test script for easy testing"""
384
+ print_step("Creating test script...")
385
+
386
+ # Create test script for Windows
387
+ if sys.platform == "win32":
388
+ script_content = """@echo off
389
+ echo Running Unified System Tests...
390
+ echo.
391
+
392
+ echo Make sure the unified application is running first!
393
+ echo Press any key to continue or Ctrl+C to cancel...
394
+ pause >nul
395
+
396
+ echo Running comprehensive tests...
397
+ python test_unified.py
398
+
399
+ pause
400
+ """
401
+ with open("run_tests.bat", "w") as f:
402
+ f.write(script_content)
403
+ print(" βœ… Created run_tests.bat")
404
+
405
+ # Create test script for Unix/Linux/Mac
406
+ else:
407
+ script_content = """#!/bin/bash
408
+
409
+ echo "Running Unified System Tests..."
410
+ echo
411
+
412
+ echo "Make sure the unified application is running first!"
413
+ read -p "Press Enter to continue or Ctrl+C to cancel..."
414
+
415
+ echo "Running comprehensive tests..."
416
+ python test_unified.py
417
+ """
418
+ with open("run_tests.sh", "w") as f:
419
+ f.write(script_content)
420
+
421
+ # Make executable
422
+ os.chmod("run_tests.sh", 0o755)
423
+ print(" βœ… Created run_tests.sh")
424
+
425
+ return True
426
+
427
+ def main():
428
+ """Main setup function"""
429
+ print_header("Unified AI Services - Automated Setup")
430
+
431
+ print("This script will help you set up the Unified AI Services application.")
432
+ print("It will:")
433
+ print(" β€’ Check your Python environment")
434
+ print(" β€’ Create necessary directories")
435
+ print(" β€’ Check for required service files")
436
+ print(" β€’ Set up configuration (.env file)")
437
+ print(" β€’ Install Python dependencies")
438
+ print(" β€’ Validate configuration")
439
+ print(" β€’ Test database connection")
440
+ print(" β€’ Create startup and test scripts")
441
+
442
+ response = input("\nProceed with setup? (Y/n): ")
443
+ if response.lower() == 'n':
444
+ print("Setup cancelled.")
445
+ return
446
+
447
+ setup_steps = [
448
+ ("Python Version Check", check_python_version),
449
+ ("Directory Structure", create_directory_structure),
450
+ ("Service Files Check", check_service_files),
451
+ ("Environment Configuration", create_env_file),
452
+ ("Dependencies Installation", install_dependencies),
453
+ ("Configuration Validation", validate_configuration),
454
+ ("Startup Scripts", create_startup_script),
455
+ ("Test Scripts", create_test_script),
456
+ ]
457
+
458
+ failed_steps = []
459
+
460
+ for step_name, step_func in setup_steps:
461
+ try:
462
+ if not step_func():
463
+ failed_steps.append(step_name)
464
+ except Exception as e:
465
+ print(f" ❌ {step_name} failed with exception: {e}")
466
+ failed_steps.append(step_name)
467
+
468
+ # Database connection test (optional)
469
+ print_step("Testing database connection (optional)...")
470
+ try:
471
+ asyncio.run(test_database_connection())
472
+ except Exception as e:
473
+ print(f" ⚠️ Database test skipped: {e}")
474
+
475
+ # Final summary
476
+ print_header("Setup Summary")
477
+
478
+ if not failed_steps:
479
+ print("πŸŽ‰ Setup completed successfully!")
480
+ print("\nNext steps:")
481
+ print("1. Review the .env file and update any missing values")
482
+ print("2. Start the unified application:")
483
+ if sys.platform == "win32":
484
+ print(" β€’ Double-click start_services.bat")
485
+ print(" β€’ Or run: python app.py")
486
+ else:
487
+ print(" β€’ Run: ./start_services.sh")
488
+ print(" β€’ Or run: python app.py")
489
+ print("3. Test the system:")
490
+ if sys.platform == "win32":
491
+ print(" β€’ Double-click run_tests.bat")
492
+ print(" β€’ Or run: python test_unified.py")
493
+ else:
494
+ print(" β€’ Run: ./run_tests.sh")
495
+ print(" β€’ Or run: python test_unified.py")
496
+ print("4. Access the API documentation at: http://localhost:8000/docs")
497
+
498
+ else:
499
+ print("⚠️ Setup completed with some issues:")
500
+ for step in failed_steps:
501
+ print(f" ❌ {step}")
502
+
503
+ print("\nPlease resolve the failed steps before proceeding.")
504
+ print("You may need to:")
505
+ print("β€’ Check your internet connection for dependency installation")
506
+ print("β€’ Verify your Azure service credentials")
507
+ print("β€’ Ensure PostgreSQL is accessible")
508
+ print("β€’ Check file permissions")
509
+
510
+ if __name__ == "__main__":
511
+ main()
test.py ADDED
@@ -0,0 +1,1055 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Comprehensive Test Suite for Unified AI Services
4
+ Tests the unified application and all integrated services (NER, OCR, RAG)
5
+ Combines functionality from test_rag.py and test_ner.py with new unified tests
6
+ """
7
+
8
+ import asyncio
9
+ import httpx
10
+ import json
11
+ import io
12
+ import sys
13
+ import time
14
+ import tempfile
15
+ import os
16
+ from pathlib import Path
17
+ from typing import Dict, List, Any, Optional, Tuple
18
+ import uuid as python_uuid
19
+
20
+ # Import configuration
21
+ try:
22
+ from configs import get_config, validate_environment
23
+ config = get_config()
24
+ except ImportError:
25
+ print("⚠️ Could not import configs. Using default values.")
26
+ config = None
27
+
28
+ # Test configuration
29
+ UNIFIED_URL = "http://localhost:8000" # Main unified app
30
+ NER_URL = "http://localhost:8500" # Direct NER service
31
+ OCR_URL = "http://localhost:8400" # Direct OCR service
32
+ RAG_URL = "http://localhost:8401" # Direct RAG service
33
+ TEST_TIMEOUT = 300
34
+
35
+ # Test data (from original test files)
36
+ THAI_CYANIDE_MURDER_CASE = """
37
+ ΰΉ€ΰΈ«ΰΈ•ΰΈΈΰΈ†ΰΈ²ΰΈ•ΰΈΰΈ£ΰΈ£ΰΈ‘ΰΈ”ΰΉ‰ΰΈ§ΰΈ’ΰΉ„ΰΈ‹ΰΈ’ΰΈ²ΰΉ„ΰΈ™ΰΈ”ΰΉŒ พ.ΰΈ¨. 2566
38
+
39
+ ΰΈ„ΰΈ”ΰΈ΅ΰΈ†ΰΈ²ΰΈ•ΰΈΰΈ£ΰΈ£ΰΈ‘ΰΈ•ΰΉˆΰΈ­ΰΉ€ΰΈ™ΰΈ·ΰΉˆΰΈ­ΰΈ‡ΰΈ—ΰΈ΅ΰΉˆΰΈͺΰΈ±ΰΉˆΰΈ™ΰΈͺΰΈ°ΰΉ€ΰΈ—ΰΈ·ΰΈ­ΰΈ™ΰΈͺΰΈ±ΰΈ‡ΰΈ„ΰΈ‘ΰΉ„ΰΈ—ΰΈ’ ΰΉ€ΰΈΰΈ΄ΰΈ”ΰΈ‚ΰΈΆΰΉ‰ΰΈ™ΰΈ£ΰΈ°ΰΈ«ΰΈ§ΰΉˆΰΈ²ΰΈ‡ΰΉ€ΰΈ”ΰΈ·ΰΈ­ΰΈ™ΰΉ€ΰΈ‘ΰΈ©ΰΈ²ΰΈ’ΰΈ™-ΰΈ•ΰΈΈΰΈ₯ΰΈ²ΰΈ„ΰΈ‘ พ.ΰΈ¨. 2566
40
+ ΰΉ‚ΰΈ”ΰΈ’ΰΈ‘ΰΈ΅ΰΈ™ΰΈ²ΰΈ‡ΰΈͺΰΈ²ΰΈ§ΰΈͺΰΈ²ΰΈ£ΰΈ΄ΰΈ“ΰΈ΅ ΰΈŠΰΈ±ΰΈ’ΰΈ§ΰΈ±ΰΈ’ΰΈ™ΰΉŒ ΰΈ«ΰΈ£ΰΈ·ΰΈ­ "แอฑ ΰΉ„ΰΈ‹ΰΈ’ΰΈ²ΰΉ„ΰΈ™ΰΈ”ΰΉŒ" ΰΈ­ΰΈ²ΰΈ’ΰΈΈ 36 ΰΈ›ΰΈ΅ ΰΉ€ΰΈ›ΰΉ‡ΰΈ™ΰΈœΰΈΉΰΉ‰ΰΈ•ΰΉ‰ΰΈ­ΰΈ‡ΰΈ«ΰΈ²
41
+
42
+ ΰΈ£ΰΈ²ΰΈ’ΰΈ₯ΰΈ°ΰΉ€ΰΈ­ΰΈ΅ΰΈ’ΰΈ”ΰΈ„ΰΈ”ΰΈ΅:
43
+ ΰΈœΰΈΉΰΉ‰ΰΈ•ΰΉ‰ΰΈ­ΰΈ‡ΰΈ«ΰΈ²ΰΉ„ΰΈ”ΰΉ‰ΰΈ—ΰΈ³ΰΈΰΈ²ΰΈ£ΰΈ§ΰΈ²ΰΈ‡ΰΈ’ΰΈ²ΰΈžΰΈ΄ΰΈ©ΰΉ„ΰΈ‹ΰΈ’ΰΈ²ΰΉ„ΰΈ™ΰΈ”ΰΉŒ (Potassium Cyanide) ในอาหารแΰΈ₯ΰΈ°ΰΉ€ΰΈ„ΰΈ£ΰΈ·ΰΉˆΰΈ­ΰΈ‡ΰΈ”ΰΈ·ΰΉˆΰΈ‘ΰΈ‚ΰΈ­ΰΈ‡ΰΉ€ΰΈ«ΰΈ’ΰΈ·ΰΉˆΰΈ­ΰΈ«ΰΈ₯ΰΈ²ΰΈ’ΰΈ£ΰΈ²ΰΈ’
44
+ ΰΉ€ΰΈ«ΰΈ’ΰΈ·ΰΉˆΰΈ­ΰΈ£ΰΈ²ΰΈ’ΰΉΰΈ£ΰΈΰΈ„ΰΈ·ΰΈ­ ΰΈ™ΰΈ²ΰΈ‡ΰΈͺิริพร บุญΰΈ₯ΰΈ²ΰΈ ΰΈ§ΰΈ™ΰΈ΄ΰΈŠ ΰΈ­ΰΈ²ΰΈ’ΰΈΈ 32 ΰΈ›ΰΈ΅ ΰΉ€ΰΈͺΰΈ΅ΰΈ’ΰΈŠΰΈ΅ΰΈ§ΰΈ΄ΰΈ•ΰΉ€ΰΈ‘ΰΈ·ΰΉˆΰΈ­ΰΈ§ΰΈ±ΰΈ™ΰΈ—ΰΈ΅ΰΉˆ 14 ΰΉ€ΰΈ‘ΰΈ©ΰΈ²ΰΈ’ΰΈ™ 2566 ΰΈ—ΰΈ΅ΰΉˆΰΈˆΰΈ±ΰΈ‡ΰΈ«ΰΈ§ΰΈ±ΰΈ”ΰΈΰΈ²ΰΈΰΈˆΰΈ™ΰΈšΰΈΈΰΈ£ΰΈ΅
45
+ ΰΉ€ΰΈ«ΰΈ’ΰΈ·ΰΉˆΰΈ­ΰΈ£ΰΈ²ΰΈ’ΰΈ—ΰΈ΅ΰΉˆΰΈͺΰΈ­ΰΈ‡ ΰΈ™ΰΈ²ΰΈ’ΰΈͺุรชัฒ ΰΈ­ΰΈ’ΰΈΉΰΉˆΰΈ„ΰΈ‡ΰΈ„ΰΈ₯ΰΈ±ΰΈ‡ ΰΈ­ΰΈ²ΰΈ’ΰΈΈ 45 ΰΈ›ΰΈ΅ ΰΉ€ΰΈͺΰΈ΅ΰΈ’ΰΈŠΰΈ΅ΰΈ§ΰΈ΄ΰΈ•ΰΉ€ΰΈ‘ΰΈ·ΰΉˆΰΈ­ΰΈ§ΰΈ±ΰΈ™ΰΈ—ΰΈ΅ΰΉˆ 2 ΰΈžΰΈ€ΰΈ©ΰΈ ΰΈ²ΰΈ„ΰΈ‘ 2566 ΰΈ—ΰΈ΅ΰΉˆΰΈˆΰΈ±ΰΈ‡ΰΈ«ΰΈ§ΰΈ±ΰΈ”ΰΈ£ΰΈ²ΰΈŠΰΈšΰΈΈΰΈ£ΰΈ΅
46
+
47
+ การΰΈͺืบΰΈͺΰΈ§ΰΈ™:
48
+ ΰΈ•ΰΈ³ΰΈ£ΰΈ§ΰΈˆΰΈ ΰΈΉΰΈ˜ΰΈ£ΰΈ ΰΈ²ΰΈ„ 7 ร่วฑกับ ΰΈͺΰΈ³ΰΈ™ΰΈ±ΰΈΰΈ‡ΰΈ²ΰΈ™ΰΈ•ΰΈ³ΰΈ£ΰΈ§ΰΈˆΰΉΰΈ«ΰΉˆΰΈ‡ΰΈŠΰΈ²ΰΈ•ΰΈ΄ ทำการΰΈͺืบΰΈͺΰΈ§ΰΈ™
49
+ พบหΰΈ₯ΰΈ±ΰΈΰΈΰΈ²ΰΈ™ΰΈˆΰΈ²ΰΈΰΈΰΈ₯ΰΉ‰ΰΈ­ΰΈ‡ΰΈ§ΰΈ‡ΰΈˆΰΈ£ΰΈ›ΰΈ΄ΰΈ” (CCTV) ΰΉƒΰΈ™ΰΈ«ΰΈ₯ΰΈ²ΰΈ’ΰΈžΰΈ·ΰΉ‰ΰΈ™ΰΈ—ΰΈ΅ΰΉˆ
50
+ ΰΈ•ΰΈ£ΰΈ§ΰΈˆΰΈžΰΈšΰΈͺΰΈ²ΰΈ£ΰΉ„ΰΈ‹ΰΈ’ΰΈ²ΰΉ„ΰΈ™ΰΈ”ΰΉŒΰΉƒΰΈ™ΰΈ£ΰΉˆΰΈ²ΰΈ‡ΰΈΰΈ²ΰΈ’ΰΉ€ΰΈ«ΰΈ’ΰΈ·ΰΉˆΰΈ­ΰΈ—ΰΈΈΰΈΰΈ£ΰΈ²ΰΈ’
51
+
52
+ การจับกุฑ:
53
+ ΰΈ§ΰΈ±ΰΈ™ΰΈ—ΰΈ΅ΰΉˆ 3 ΰΈ•ΰΈΈΰΈ₯ΰΈ²ΰΈ„ΰΈ‘ 2566 ΰΈ•ΰΈ³ΰΈ£ΰΈ§ΰΈˆΰΈˆΰΈ±ΰΈšΰΈΰΈΈΰΈ‘ΰΈ•ΰΈ±ΰΈ§ΰΈœΰΈΉΰΉ‰ΰΈ•ΰΉ‰ΰΈ­ΰΈ‡ΰΈ«ΰΈ²ΰΉ„ΰΈ”ΰΉ‰ΰΈ—ΰΈ΅ΰΉˆΰΉ‚ΰΈ£ΰΈ‡ΰΉΰΈ£ΰΈ‘ΰΉ€ΰΈ”ΰΈ­ΰΈ° ΰΈšΰΈ²ΰΈ’ΰΉΰΈ‹ΰΈ” ΰΈ•ΰΈ±ΰΉ‰ΰΈ‡ΰΈ­ΰΈ’ΰΈΉΰΉˆΰΈ—ΰΈ΅ΰΉˆ ถนนราฑคำแหง ΰΈΰΈ£ΰΈΈΰΈ‡ΰΉ€ΰΈ—ΰΈžΰΈ‘ΰΈ«ΰΈ²ΰΈ™ΰΈ„ΰΈ£
54
+ ΰΈžΰΈšΰΉ€ΰΈ­ΰΈΰΈͺΰΈ²ΰΈ£ΰΈ›ΰΈ₯ΰΈ­ΰΈ‘ ΰΈšΰΈ±ΰΈ•ΰΈ£ΰΈ›ΰΈ£ΰΈ°ΰΈŠΰΈ²ΰΈŠΰΈ™ΰΈ›ΰΈ₯ΰΈ­ΰΈ‘ แΰΈ₯ΰΈ°ΰΈ§ΰΈ±ΰΈ•ΰΈ–ΰΈΈΰΈžΰΈ’ΰΈ²ΰΈ™ΰΈͺΰΈ³ΰΈ„ΰΈ±ΰΈΰΈ­ΰΈ·ΰΉˆΰΈ™ΰΉ†
55
+ ΰΈ’ΰΈΆΰΈ”ΰΈ—ΰΈ£ΰΈ±ΰΈžΰΈ’ΰΉŒΰΈͺΰΈ΄ΰΈ™ΰΈ—ΰΈ΅ΰΉˆΰΉ„ΰΈ”ΰΉ‰ΰΈˆΰΈ²ΰΈΰΈΰΈ²ΰΈ£ΰΈΰΈ£ΰΈ°ΰΈ—ΰΈ³ΰΈœΰΈ΄ΰΈ” ΰΈ‘ΰΈΉΰΈ₯ΰΈ„ΰΉˆΰΈ²ΰΈ£ΰΈ§ΰΈ‘ΰΈΰΈ§ΰΉˆΰΈ² 2 ΰΈ₯ΰΉ‰ΰΈ²ΰΈ™ΰΈšΰΈ²ΰΈ—
56
+ """
57
+
58
+ ENGLISH_CYBERSECURITY_CASE = """
59
+ Major Cybersecurity Incident Report - Operation Digital Shield
60
+
61
+ Incident Overview:
62
+ On October 15, 2024, CyberDefense Corp, a leading cybersecurity firm headquartered in Austin, Texas, detected a sophisticated Advanced Persistent Threat (APT) targeting critical infrastructure across Southeast Asia.
63
+
64
+ Key Personnel:
65
+ - Dr. Sarah Chen, Chief Security Officer at CyberDefense Corp
66
+ - Agent Michael Rodriguez, FBI Cyber Division
67
+ - Captain Lisa Thompson, US Cyber Command
68
+
69
+ Technical Details:
70
+ The attackers used a custom malware strain called "DeepStrike" developed by the Shadow Dragon group
71
+ Primary attack vector: spear-phishing emails containing weaponized PDF documents
72
+ Estimated financial damage: $50 million USD across affected organizations
73
+ """
74
+
75
+ TEST_URLS = [
76
+ "https://httpbin.org/html",
77
+ "https://httpbin.org/json"
78
+ ]
79
+
80
+ class TestResult:
81
+ """Class to track test results"""
82
+ def __init__(self):
83
+ self.total_tests = 0
84
+ self.passed_tests = 0
85
+ self.failed_tests = 0
86
+ self.test_results = []
87
+ self.warnings = []
88
+
89
+ def add_result(self, test_name: str, passed: bool, message: str = "", details: Dict = None):
90
+ """Add a test result"""
91
+ self.total_tests += 1
92
+ if passed:
93
+ self.passed_tests += 1
94
+ print(f"βœ… {test_name}")
95
+ if message:
96
+ print(f" {message}")
97
+ else:
98
+ self.failed_tests += 1
99
+ print(f"❌ {test_name}: {message}")
100
+
101
+ self.test_results.append({
102
+ 'test_name': test_name,
103
+ 'passed': passed,
104
+ 'message': message,
105
+ 'details': details or {}
106
+ })
107
+
108
+ def add_warning(self, test_name: str, message: str):
109
+ """Add a warning (doesn't count as pass/fail)"""
110
+ print(f"⚠️ {test_name}: {message}")
111
+ self.warnings.append({
112
+ 'test_name': test_name,
113
+ 'message': message
114
+ })
115
+
116
+ def print_summary(self):
117
+ """Print test summary"""
118
+ print("\n" + "="*60)
119
+ print("UNIFIED SYSTEM TEST SUMMARY")
120
+ print("="*60)
121
+ print(f"Total Tests: {self.total_tests}")
122
+ print(f"Passed: {self.passed_tests}")
123
+ print(f"Failed: {self.failed_tests}")
124
+ print(f"Warnings: {len(self.warnings)}")
125
+ print(f"Success Rate: {(self.passed_tests/self.total_tests*100):.1f}%" if self.total_tests > 0 else "0%")
126
+
127
+ if self.failed_tests > 0:
128
+ print(f"\n❌ FAILED TESTS:")
129
+ for result in self.test_results:
130
+ if not result['passed']:
131
+ print(f" - {result['test_name']}: {result['message']}")
132
+
133
+ if self.warnings:
134
+ print(f"\n⚠️ WARNINGS:")
135
+ for warning in self.warnings:
136
+ print(f" - {warning['test_name']}: {warning['message']}")
137
+
138
+ class UnifiedSystemTester:
139
+ """Main test class for unified system"""
140
+
141
+ def __init__(self):
142
+ self.result = TestResult()
143
+ self.session = None
144
+ self.created_documents = [] # Track for cleanup
145
+ self.created_analyses = [] # Track for cleanup
146
+
147
+ async def __aenter__(self):
148
+ self.session = httpx.AsyncClient(timeout=TEST_TIMEOUT)
149
+ return self
150
+
151
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
152
+ if self.session:
153
+ await self.session.aclose()
154
+
155
+ async def make_request(self, method: str, url: str, **kwargs) -> httpx.Response:
156
+ """Make HTTP request with error handling"""
157
+ try:
158
+ response = await self.session.request(method, url, **kwargs)
159
+ return response
160
+ except httpx.RequestError as e:
161
+ raise Exception(f"Request failed: {e}")
162
+
163
+ async def test_unified_app_health(self):
164
+ """Test 1: Unified Application Health Check"""
165
+ print("πŸ” Test 1: Unified Application Health Check")
166
+ try:
167
+ response = await self.make_request('GET', f"{UNIFIED_URL}/health")
168
+
169
+ if response.status_code == 200:
170
+ data = response.json()
171
+ status = data.get("status")
172
+ services = data.get("services", [])
173
+
174
+ healthy_services = [s for s in services if s.get("health")]
175
+ total_services = len(services)
176
+
177
+ if status in ["healthy", "degraded"] and healthy_services:
178
+ message = f"Status: {status}, Services: {len(healthy_services)}/{total_services} healthy"
179
+ for service in services:
180
+ service_status = "βœ…" if service.get("health") else "❌"
181
+ message += f"\n {service_status} {service.get('name')}: {service.get('status')} ({service.get('response_time', 0):.3f}s)"
182
+
183
+ self.result.add_result(
184
+ "Unified App Health Check",
185
+ True,
186
+ message,
187
+ data
188
+ )
189
+ return True
190
+ else:
191
+ self.result.add_result(
192
+ "Unified App Health Check",
193
+ False,
194
+ f"System unhealthy: {data}"
195
+ )
196
+ return False
197
+ else:
198
+ self.result.add_result(
199
+ "Unified App Health Check",
200
+ False,
201
+ f"HTTP {response.status_code}: {response.text}"
202
+ )
203
+ return False
204
+ except Exception as e:
205
+ # Provide detailed diagnostics for connection failures
206
+ if "connection" in str(e).lower():
207
+ print(f"\nπŸ” Connection Diagnostics:")
208
+ print(f" Unified App URL: {UNIFIED_URL}")
209
+ print(f" Error: {e}")
210
+ print(f"\nπŸ’‘ Possible Issues:")
211
+ print(f" 1. Unified app is not running")
212
+ print(f" 2. Wrong host/port in configuration")
213
+ print(f" 3. Services failed to start")
214
+ print(f"\nπŸš€ To Start Unified App:")
215
+ print(f" python app.py")
216
+
217
+ self.result.add_result(
218
+ "Unified App Health Check",
219
+ False,
220
+ str(e)
221
+ )
222
+ return False
223
+
224
+ async def test_individual_service_health(self):
225
+ """Test 2: Individual Service Health Checks"""
226
+ print("πŸ” Test 2: Individual Service Health Checks")
227
+
228
+ services = [
229
+ ("NER", NER_URL),
230
+ ("OCR", OCR_URL),
231
+ ("RAG", RAG_URL)
232
+ ]
233
+
234
+ all_healthy = True
235
+ service_statuses = {}
236
+
237
+ for service_name, service_url in services:
238
+ try:
239
+ response = await self.make_request('GET', f"{service_url}/health")
240
+
241
+ if response.status_code == 200:
242
+ data = response.json()
243
+ status = data.get("status", "unknown")
244
+ service_statuses[service_name] = {
245
+ "healthy": True,
246
+ "status": status,
247
+ "details": data
248
+ }
249
+ print(f" βœ… {service_name}: {status}")
250
+ else:
251
+ service_statuses[service_name] = {
252
+ "healthy": False,
253
+ "status": f"HTTP {response.status_code}",
254
+ "details": None
255
+ }
256
+ print(f" ❌ {service_name}: HTTP {response.status_code}")
257
+ all_healthy = False
258
+
259
+ except Exception as e:
260
+ service_statuses[service_name] = {
261
+ "healthy": False,
262
+ "status": f"Error: {e}",
263
+ "details": None
264
+ }
265
+ print(f" ❌ {service_name}: {e}")
266
+ all_healthy = False
267
+
268
+ self.result.add_result(
269
+ "Individual Service Health",
270
+ all_healthy,
271
+ f"Services healthy: {sum(1 for s in service_statuses.values() if s['healthy'])}/{len(services)}",
272
+ service_statuses
273
+ )
274
+
275
+ return all_healthy
276
+
277
+ async def test_unified_analysis_text(self):
278
+ """Test 3: Unified Analysis with Text"""
279
+ print("πŸ” Test 3: Unified Analysis with Text")
280
+
281
+ try:
282
+ request_data = {
283
+ "text": THAI_CYANIDE_MURDER_CASE,
284
+ "extract_relationships": True,
285
+ "include_embeddings": False,
286
+ "include_summary": True,
287
+ "generate_graph_files": True,
288
+ "export_formats": ["neo4j", "json"],
289
+ "enable_rag_indexing": True,
290
+ "rag_title": "Cyanide Murder Case Analysis",
291
+ "rag_keywords": ["cyanide", "murder", "investigation", "thai"],
292
+ "rag_metadata": {"test": True, "case_type": "criminal"}
293
+ }
294
+
295
+ response = await self.make_request('POST', f"{UNIFIED_URL}/analyze/unified", json=request_data)
296
+
297
+ if response.status_code == 200:
298
+ data = response.json()
299
+ if data.get("success"):
300
+ service_calls = data.get("service_calls", [])
301
+ ner_analysis = data.get("ner_analysis", {})
302
+ rag_document = data.get("rag_document", {})
303
+ processing_time = data.get("processing_time", 0)
304
+
305
+ # Validate NER analysis
306
+ entities = ner_analysis.get("entities", [])
307
+ relationships = ner_analysis.get("relationships", [])
308
+
309
+ # Track analysis for cleanup
310
+ if ner_analysis.get("analysis_id"):
311
+ self.created_analyses.append(ner_analysis["analysis_id"])
312
+ if rag_document and rag_document.get("document_id"):
313
+ self.created_documents.append(rag_document["document_id"])
314
+
315
+ message = f"Service calls: {', '.join(service_calls)}"
316
+ message += f"\n Processing time: {processing_time:.2f}s"
317
+ message += f"\n NER entities: {len(entities)}"
318
+ message += f"\n NER relationships: {len(relationships)}"
319
+ if rag_document:
320
+ message += f"\n RAG document ID: {rag_document.get('document_id', 'N/A')}"
321
+ message += f"\n RAG chunks: {rag_document.get('total_chunks', 0)}"
322
+
323
+ # Check if we got expected service calls
324
+ expected_calls = ["ner_text"]
325
+ if "enable_rag_indexing" in request_data and request_data["enable_rag_indexing"]:
326
+ expected_calls.append("rag_upload")
327
+
328
+ all_expected_calls = all(call in service_calls for call in expected_calls)
329
+
330
+ self.result.add_result(
331
+ "Unified Analysis (Text)",
332
+ all_expected_calls and entities and len(service_calls) > 0,
333
+ message,
334
+ data
335
+ )
336
+ return data
337
+ else:
338
+ self.result.add_result(
339
+ "Unified Analysis (Text)",
340
+ False,
341
+ data.get("error", "Analysis failed")
342
+ )
343
+ return None
344
+ else:
345
+ self.result.add_result(
346
+ "Unified Analysis (Text)",
347
+ False,
348
+ f"HTTP {response.status_code}: {response.text[:200]}"
349
+ )
350
+ return None
351
+ except Exception as e:
352
+ self.result.add_result(
353
+ "Unified Analysis (Text)",
354
+ False,
355
+ str(e)
356
+ )
357
+ return None
358
+
359
+ async def test_unified_analysis_url(self):
360
+ """Test 4: Unified Analysis with URL"""
361
+ print("πŸ” Test 4: Unified Analysis with URL")
362
+
363
+ try:
364
+ request_data = {
365
+ "url": "https://httpbin.org/html",
366
+ "extract_relationships": True,
367
+ "include_embeddings": False,
368
+ "include_summary": True,
369
+ "generate_graph_files": False,
370
+ "export_formats": ["json"],
371
+ "enable_rag_indexing": True,
372
+ "rag_title": "Test URL Document",
373
+ "rag_keywords": ["test", "url", "httpbin"],
374
+ "rag_metadata": {"test": True, "source": "httpbin"}
375
+ }
376
+
377
+ response = await self.make_request('POST', f"{UNIFIED_URL}/analyze/unified", json=request_data)
378
+
379
+ if response.status_code == 200:
380
+ data = response.json()
381
+ if data.get("success"):
382
+ service_calls = data.get("service_calls", [])
383
+ ner_analysis = data.get("ner_analysis", {})
384
+ rag_document = data.get("rag_document", {})
385
+
386
+ # Track for cleanup
387
+ if ner_analysis.get("analysis_id"):
388
+ self.created_analyses.append(ner_analysis["analysis_id"])
389
+ if rag_document and rag_document.get("document_id"):
390
+ self.created_documents.append(rag_document["document_id"])
391
+
392
+ message = f"Service calls: {', '.join(service_calls)}"
393
+ message += f"\n NER analysis ID: {ner_analysis.get('analysis_id', 'N/A')}"
394
+ if rag_document:
395
+ message += f"\n RAG document ID: {rag_document.get('document_id', 'N/A')}"
396
+
397
+ # Check for expected service calls
398
+ has_ner_url = "ner_url" in service_calls
399
+ has_rag_url = "rag_url" in service_calls
400
+
401
+ self.result.add_result(
402
+ "Unified Analysis (URL)",
403
+ has_ner_url and len(service_calls) > 0,
404
+ message,
405
+ data
406
+ )
407
+ return data
408
+ else:
409
+ self.result.add_result(
410
+ "Unified Analysis (URL)",
411
+ False,
412
+ data.get("error", "URL analysis failed")
413
+ )
414
+ return None
415
+ else:
416
+ self.result.add_result(
417
+ "Unified Analysis (URL)",
418
+ False,
419
+ f"HTTP {response.status_code}: {response.text[:200]}"
420
+ )
421
+ return None
422
+ except Exception as e:
423
+ self.result.add_result(
424
+ "Unified Analysis (URL)",
425
+ False,
426
+ str(e)
427
+ )
428
+ return None
429
+
430
+ async def test_combined_search(self):
431
+ """Test 5: Combined Search with NER Analysis"""
432
+ print("πŸ” Test 5: Combined Search with NER Analysis")
433
+
434
+ # Wait a moment for indexing to complete
435
+ await asyncio.sleep(2)
436
+
437
+ try:
438
+ request_data = {
439
+ "query": "investigation murder case",
440
+ "limit": 5,
441
+ "similarity_threshold": 0.1, # Lower threshold for better results
442
+ "include_ner_analysis": True,
443
+ "ner_export_formats": ["json"]
444
+ }
445
+
446
+ response = await self.make_request('POST', f"{UNIFIED_URL}/search/combined", json=request_data)
447
+
448
+ if response.status_code == 200:
449
+ data = response.json()
450
+ if data.get("success"):
451
+ service_calls = data.get("service_calls", [])
452
+ search_results = data.get("search_results", {})
453
+ results = search_results.get("results", [])
454
+ ner_analyses = search_results.get("ner_analyses", [])
455
+
456
+ message = f"Service calls: {', '.join(service_calls)}"
457
+ message += f"\n Search results: {len(results)}"
458
+ message += f"\n NER analyses: {len(ner_analyses)}"
459
+ message += f"\n Processing time: {data.get('processing_time', 0):.2f}s"
460
+
461
+ # Check for expected service calls
462
+ has_rag_search = "rag_search" in service_calls
463
+ has_ner_analysis = any("ner_text_" in call for call in service_calls)
464
+
465
+ success = has_rag_search and len(service_calls) > 0
466
+ if len(results) == 0:
467
+ self.result.add_warning(
468
+ "Combined Search",
469
+ "No search results found - may need more indexed content"
470
+ )
471
+
472
+ self.result.add_result(
473
+ "Combined Search",
474
+ success,
475
+ message,
476
+ data
477
+ )
478
+ return data
479
+ else:
480
+ self.result.add_result(
481
+ "Combined Search",
482
+ False,
483
+ data.get("error", "Search failed")
484
+ )
485
+ return None
486
+ else:
487
+ self.result.add_result(
488
+ "Combined Search",
489
+ False,
490
+ f"HTTP {response.status_code}: {response.text[:200]}"
491
+ )
492
+ return None
493
+ except Exception as e:
494
+ self.result.add_result(
495
+ "Combined Search",
496
+ False,
497
+ str(e)
498
+ )
499
+ return None
500
+
501
+ async def test_service_proxies(self):
502
+ """Test 6: Service Proxy Endpoints"""
503
+ print("πŸ” Test 6: Service Proxy Endpoints")
504
+
505
+ proxy_tests = []
506
+
507
+ # Test NER proxy
508
+ try:
509
+ ner_data = {
510
+ "text": "Test entity recognition with John Smith working at Microsoft in Seattle.",
511
+ "extract_relationships": True,
512
+ "include_embeddings": False,
513
+ "generate_graph_files": False
514
+ }
515
+
516
+ response = await self.make_request('POST', f"{UNIFIED_URL}/ner/analyze/text", json=ner_data)
517
+
518
+ if response.status_code == 200:
519
+ result = response.json()
520
+ if result.get("success"):
521
+ entities = result.get("entities", [])
522
+ proxy_tests.append(("NER Proxy", True, f"Found {len(entities)} entities"))
523
+
524
+ # Track for cleanup
525
+ if result.get("analysis_id"):
526
+ self.created_analyses.append(result["analysis_id"])
527
+ else:
528
+ proxy_tests.append(("NER Proxy", False, "Analysis failed"))
529
+ else:
530
+ proxy_tests.append(("NER Proxy", False, f"HTTP {response.status_code}"))
531
+ except Exception as e:
532
+ proxy_tests.append(("NER Proxy", False, str(e)))
533
+
534
+ # Test OCR proxy
535
+ try:
536
+ response = await self.make_request('GET', f"{UNIFIED_URL}/ocr/health")
537
+
538
+ if response.status_code == 200:
539
+ proxy_tests.append(("OCR Proxy", True, "Health check passed"))
540
+ else:
541
+ proxy_tests.append(("OCR Proxy", False, f"HTTP {response.status_code}"))
542
+ except Exception as e:
543
+ proxy_tests.append(("OCR Proxy", False, str(e)))
544
+
545
+ # Test RAG proxy
546
+ try:
547
+ response = await self.make_request('GET', f"{UNIFIED_URL}/rag/documents?limit=5")
548
+
549
+ if response.status_code == 200:
550
+ result = response.json()
551
+ documents = result.get("documents", [])
552
+ proxy_tests.append(("RAG Proxy", True, f"Found {len(documents)} documents"))
553
+ else:
554
+ proxy_tests.append(("RAG Proxy", False, f"HTTP {response.status_code}"))
555
+ except Exception as e:
556
+ proxy_tests.append(("RAG Proxy", False, str(e)))
557
+
558
+ # Evaluate proxy tests
559
+ passed_proxies = sum(1 for _, passed, _ in proxy_tests if passed)
560
+ total_proxies = len(proxy_tests)
561
+
562
+ for test_name, passed, message in proxy_tests:
563
+ print(f" {'βœ…' if passed else '❌'} {test_name}: {message}")
564
+
565
+ self.result.add_result(
566
+ "Service Proxies",
567
+ passed_proxies == total_proxies,
568
+ f"Proxies working: {passed_proxies}/{total_proxies}",
569
+ {"proxy_results": proxy_tests}
570
+ )
571
+
572
+ return passed_proxies > 0
573
+
574
+ async def test_file_upload_unified(self):
575
+ """Test 7: File Upload through Unified Interface"""
576
+ print("πŸ” Test 7: File Upload through Unified Interface")
577
+
578
+ try:
579
+ # Create test document
580
+ test_content = """
581
+ Technical Report: Advanced AI Systems
582
+
583
+ This report examines the integration of Named Entity Recognition (NER),
584
+ Optical Character Recognition (OCR), and Retrieval-Augmented Generation (RAG)
585
+ systems in a unified architecture.
586
+
587
+ Key Personnel:
588
+ - Dr. Alice Johnson, Lead AI Researcher at TechCorp
589
+ - Prof. Bob Smith, University of Technology
590
+ - Sarah Wilson, Data Scientist
591
+
592
+ Technical Components:
593
+ - Azure OpenAI for embeddings and language processing
594
+ - PostgreSQL with vector extensions for data storage
595
+ - FastAPI for microservice architecture
596
+
597
+ The system processes documents through multiple stages:
598
+ 1. OCR extraction for scanned documents
599
+ 2. NER analysis for entity and relationship extraction
600
+ 3. RAG indexing for searchable knowledge base
601
+
602
+ Testing conducted on October 15, 2024 showed 95% accuracy.
603
+ Total budget: $250,000 for the complete implementation.
604
+ """
605
+
606
+ # Test through NER proxy (file upload)
607
+ file_content = test_content.encode('utf-8')
608
+ files = {"file": ("test_report.txt", io.BytesIO(file_content), "text/plain")}
609
+ data = {
610
+ "extract_relationships": "true",
611
+ "include_embeddings": "false",
612
+ "include_summary": "true",
613
+ "generate_graph_files": "true",
614
+ "export_formats": "neo4j,json"
615
+ }
616
+
617
+ response = await self.make_request(
618
+ 'POST',
619
+ f"{UNIFIED_URL}/ner/analyze/file",
620
+ files=files,
621
+ data=data
622
+ )
623
+
624
+ if response.status_code == 200:
625
+ result = response.json()
626
+ if result.get("success"):
627
+ entities = result.get("entities", [])
628
+ relationships = result.get("relationships", [])
629
+
630
+ # Track for cleanup
631
+ if result.get("analysis_id"):
632
+ self.created_analyses.append(result["analysis_id"])
633
+
634
+ message = f"File processed successfully"
635
+ message += f"\n Entities: {len(entities)}"
636
+ message += f"\n Relationships: {len(relationships)}"
637
+ message += f"\n Language: {result.get('language', 'unknown')}"
638
+
639
+ # Look for expected entities
640
+ person_entities = [e for e in entities if e.get('label') == 'PERSON']
641
+ org_entities = [e for e in entities if e.get('label') == 'ORGANIZATION']
642
+ money_entities = [e for e in entities if e.get('label') == 'MONEY']
643
+
644
+ message += f"\n People found: {len(person_entities)}"
645
+ message += f"\n Organizations found: {len(org_entities)}"
646
+ message += f"\n Money amounts found: {len(money_entities)}"
647
+
648
+ success = len(entities) > 0 and result.get("analysis_id")
649
+
650
+ self.result.add_result(
651
+ "File Upload (Unified)",
652
+ success,
653
+ message,
654
+ result
655
+ )
656
+ return result
657
+ else:
658
+ self.result.add_result(
659
+ "File Upload (Unified)",
660
+ False,
661
+ result.get("error", "File analysis failed")
662
+ )
663
+ return None
664
+ else:
665
+ self.result.add_result(
666
+ "File Upload (Unified)",
667
+ False,
668
+ f"HTTP {response.status_code}: {response.text[:200]}"
669
+ )
670
+ return None
671
+ except Exception as e:
672
+ self.result.add_result(
673
+ "File Upload (Unified)",
674
+ False,
675
+ str(e)
676
+ )
677
+ return None
678
+
679
+ async def test_service_discovery(self):
680
+ """Test 8: Service Discovery and Listing"""
681
+ print("πŸ” Test 8: Service Discovery and Listing")
682
+
683
+ try:
684
+ response = await self.make_request('GET', f"{UNIFIED_URL}/services")
685
+
686
+ if response.status_code == 200:
687
+ data = response.json()
688
+ services = data.get("services", {})
689
+ unified = data.get("unified", {})
690
+
691
+ expected_services = ["ner", "ocr", "rag"]
692
+ found_services = list(services.keys())
693
+
694
+ message = f"Services discovered: {', '.join(found_services)}"
695
+ message += f"\n Unified endpoint: {unified.get('url', 'N/A')}"
696
+
697
+ for service_name, service_info in services.items():
698
+ endpoints = service_info.get("endpoints", [])
699
+ message += f"\n {service_name}: {len(endpoints)} endpoints"
700
+
701
+ all_expected_found = all(service in found_services for service in expected_services)
702
+
703
+ self.result.add_result(
704
+ "Service Discovery",
705
+ all_expected_found,
706
+ message,
707
+ data
708
+ )
709
+ return data
710
+ else:
711
+ self.result.add_result(
712
+ "Service Discovery",
713
+ False,
714
+ f"HTTP {response.status_code}"
715
+ )
716
+ return None
717
+ except Exception as e:
718
+ self.result.add_result(
719
+ "Service Discovery",
720
+ False,
721
+ str(e)
722
+ )
723
+ return None
724
+
725
+ async def test_system_performance(self):
726
+ """Test 9: System Performance and Reliability"""
727
+ print("πŸ” Test 9: System Performance and Reliability")
728
+
729
+ try:
730
+ # Test multiple concurrent requests
731
+ tasks = []
732
+ test_texts = [
733
+ "Performance test with Apple Inc and CEO Tim Cook in California.",
734
+ "Reliability testing of Microsoft Azure services in Seattle.",
735
+ "Load testing with Google Cloud Platform and AI systems."
736
+ ]
737
+
738
+ start_time = time.time()
739
+
740
+ for i, text in enumerate(test_texts):
741
+ task = self.make_request(
742
+ 'POST',
743
+ f"{UNIFIED_URL}/ner/analyze/text",
744
+ json={
745
+ "text": text,
746
+ "extract_relationships": True,
747
+ "include_embeddings": False,
748
+ "generate_graph_files": False
749
+ }
750
+ )
751
+ tasks.append(task)
752
+
753
+ # Execute concurrent requests
754
+ responses = await asyncio.gather(*tasks, return_exceptions=True)
755
+ total_time = time.time() - start_time
756
+
757
+ # Analyze results
758
+ successful_requests = 0
759
+ total_entities = 0
760
+
761
+ for i, response in enumerate(responses):
762
+ if isinstance(response, Exception):
763
+ continue
764
+
765
+ if response.status_code == 200:
766
+ result = response.json()
767
+ if result.get("success"):
768
+ successful_requests += 1
769
+ entities = result.get("entities", [])
770
+ total_entities += len(entities)
771
+
772
+ # Track for cleanup
773
+ if result.get("analysis_id"):
774
+ self.created_analyses.append(result["analysis_id"])
775
+
776
+ avg_time_per_request = total_time / len(test_texts)
777
+
778
+ message = f"Concurrent requests: {successful_requests}/{len(test_texts)} successful"
779
+ message += f"\n Total time: {total_time:.2f}s"
780
+ message += f"\n Avg time per request: {avg_time_per_request:.2f}s"
781
+ message += f"\n Total entities found: {total_entities}"
782
+
783
+ # Performance criteria
784
+ performance_ok = (
785
+ successful_requests >= len(test_texts) * 0.8 and # 80% success rate
786
+ avg_time_per_request < 10.0 # Under 10 seconds per request
787
+ )
788
+
789
+ self.result.add_result(
790
+ "System Performance",
791
+ performance_ok,
792
+ message,
793
+ {
794
+ "successful_requests": successful_requests,
795
+ "total_requests": len(test_texts),
796
+ "total_time": total_time,
797
+ "avg_time_per_request": avg_time_per_request,
798
+ "total_entities": total_entities
799
+ }
800
+ )
801
+
802
+ return performance_ok
803
+
804
+ except Exception as e:
805
+ self.result.add_result(
806
+ "System Performance",
807
+ False,
808
+ str(e)
809
+ )
810
+ return False
811
+
812
+ async def test_error_handling(self):
813
+ """Test 10: Error Handling and Resilience"""
814
+ print("πŸ” Test 10: Error Handling and Resilience")
815
+
816
+ error_tests = []
817
+
818
+ # Test 1: Invalid unified analysis request
819
+ try:
820
+ response = await self.make_request(
821
+ 'POST',
822
+ f"{UNIFIED_URL}/analyze/unified",
823
+ json={"invalid": "data"}
824
+ )
825
+
826
+ if response.status_code in [400, 422]: # Expected validation error
827
+ error_tests.append(("Invalid Request Handling", True, "Properly rejected invalid data"))
828
+ else:
829
+ error_tests.append(("Invalid Request Handling", False, f"Unexpected status: {response.status_code}"))
830
+ except Exception as e:
831
+ error_tests.append(("Invalid Request Handling", False, str(e)))
832
+
833
+ # Test 2: Empty text analysis
834
+ try:
835
+ response = await self.make_request(
836
+ 'POST',
837
+ f"{UNIFIED_URL}/ner/analyze/text",
838
+ json={"text": "", "extract_relationships": True}
839
+ )
840
+
841
+ if response.status_code in [400, 422]: # Expected validation error
842
+ error_tests.append(("Empty Text Handling", True, "Properly rejected empty text"))
843
+ else:
844
+ result = response.json()
845
+ if not result.get("success"):
846
+ error_tests.append(("Empty Text Handling", True, "Failed gracefully"))
847
+ else:
848
+ error_tests.append(("Empty Text Handling", False, "Should have failed"))
849
+ except Exception as e:
850
+ error_tests.append(("Empty Text Handling", False, str(e)))
851
+
852
+ # Test 3: Invalid URL
853
+ try:
854
+ response = await self.make_request(
855
+ 'POST',
856
+ f"{UNIFIED_URL}/analyze/unified",
857
+ json={
858
+ "url": "https://invalid-url-that-does-not-exist-12345.com",
859
+ "extract_relationships": True
860
+ }
861
+ )
862
+
863
+ if response.status_code == 200:
864
+ result = response.json()
865
+ if not result.get("success"):
866
+ error_tests.append(("Invalid URL Handling", True, "Failed gracefully with invalid URL"))
867
+ else:
868
+ error_tests.append(("Invalid URL Handling", False, "Should have failed"))
869
+ else:
870
+ error_tests.append(("Invalid URL Handling", True, f"Rejected invalid URL (HTTP {response.status_code})"))
871
+ except Exception as e:
872
+ error_tests.append(("Invalid URL Handling", False, str(e)))
873
+
874
+ # Evaluate error handling tests
875
+ passed_error_tests = sum(1 for _, passed, _ in error_tests if passed)
876
+ total_error_tests = len(error_tests)
877
+
878
+ for test_name, passed, message in error_tests:
879
+ print(f" {'βœ…' if passed else '❌'} {test_name}: {message}")
880
+
881
+ self.result.add_result(
882
+ "Error Handling",
883
+ passed_error_tests >= total_error_tests * 0.8, # 80% success rate
884
+ f"Error tests passed: {passed_error_tests}/{total_error_tests}",
885
+ {"error_test_results": error_tests}
886
+ )
887
+
888
+ return passed_error_tests > 0
889
+
890
+ async def cleanup_test_data(self):
891
+ """Clean up test data"""
892
+ print("\n🧹 Cleaning up test data...")
893
+
894
+ cleanup_count = 0
895
+ cleanup_errors = 0
896
+
897
+ # Clean up NER analyses
898
+ for analysis_id in self.created_analyses:
899
+ try:
900
+ # Try direct service first
901
+ response = await self.make_request('DELETE', f"{NER_URL}/analysis/{analysis_id}")
902
+ if response.status_code in [200, 404]: # 404 is OK (already deleted)
903
+ cleanup_count += 1
904
+ else:
905
+ cleanup_errors += 1
906
+ except Exception as e:
907
+ cleanup_errors += 1
908
+ print(f" ⚠️ Failed to cleanup analysis {analysis_id[:8]}...: {e}")
909
+
910
+ # Clean up RAG documents
911
+ for document_id in self.created_documents:
912
+ try:
913
+ # Try through unified proxy
914
+ response = await self.make_request('DELETE', f"{UNIFIED_URL}/rag/documents/{document_id}")
915
+ if response.status_code in [200, 404]: # 404 is OK (already deleted)
916
+ cleanup_count += 1
917
+ else:
918
+ cleanup_errors += 1
919
+ except Exception as e:
920
+ cleanup_errors += 1
921
+ print(f" ⚠️ Failed to cleanup document {document_id[:8]}...: {e}")
922
+
923
+ if cleanup_count > 0:
924
+ print(f" βœ… Cleaned up {cleanup_count} test items")
925
+ if cleanup_errors > 0:
926
+ print(f" ⚠️ Failed to cleanup {cleanup_errors} items")
927
+
928
+ async def run_comprehensive_tests(self):
929
+ """Run all comprehensive unified system tests"""
930
+ print("πŸš€ Unified AI Services - Comprehensive Test Suite")
931
+ print("Testing: NER + OCR + RAG Integration with Unified Workflows")
932
+ print("=" * 80)
933
+
934
+ start_time = time.time()
935
+
936
+ # Test sequence
937
+ tests = [
938
+ ("Unified App Health", self.test_unified_app_health),
939
+ ("Individual Service Health", self.test_individual_service_health),
940
+ ("Unified Analysis (Text)", self.test_unified_analysis_text),
941
+ ("Unified Analysis (URL)", self.test_unified_analysis_url),
942
+ ("Combined Search", self.test_combined_search),
943
+ ("Service Proxies", self.test_service_proxies),
944
+ ("File Upload (Unified)", self.test_file_upload_unified),
945
+ ("Service Discovery", self.test_service_discovery),
946
+ ("System Performance", self.test_system_performance),
947
+ ("Error Handling", self.test_error_handling)
948
+ ]
949
+
950
+ for test_name, test_func in tests:
951
+ print(f"\n" + "=" * 80)
952
+ try:
953
+ await test_func()
954
+ except Exception as e:
955
+ print(f"❌ {test_name} failed with exception: {e}")
956
+ self.result.add_result(test_name, False, f"Exception: {e}")
957
+
958
+ # Cleanup
959
+ print(f"\n" + "=" * 80)
960
+ await self.cleanup_test_data()
961
+
962
+ # Final summary
963
+ total_time = time.time() - start_time
964
+ print(f"\n" + "=" * 80)
965
+ print("πŸ“Š UNIFIED SYSTEM COMPREHENSIVE TEST RESULTS")
966
+ print("=" * 80)
967
+
968
+ self.result.print_summary()
969
+
970
+ print(f"\nTEST EXECUTION:")
971
+ print(f"Total Time: {total_time:.2f} seconds")
972
+ print(f"Tests Created: NER analyses: {len(self.created_analyses)}, RAG documents: {len(self.created_documents)}")
973
+
974
+ passed = self.result.passed_tests
975
+ total = self.result.total_tests
976
+
977
+ if passed == total:
978
+ print(f"\nπŸŽ‰ ALL UNIFIED SYSTEM TESTS PASSED!")
979
+ print(f"βœ… Unified application is fully operational")
980
+ print(f"βœ… All services are integrated and working")
981
+ print(f"βœ… Combined workflows are functional")
982
+ print(f"βœ… Service proxies are working")
983
+ print(f"βœ… Error handling is robust")
984
+
985
+ print(f"\n🎯 UNIFIED SYSTEM CAPABILITIES VERIFIED:")
986
+ print(f" β€’ NER + OCR + RAG service integration")
987
+ print(f" β€’ Unified analysis workflows")
988
+ print(f" β€’ Combined search with NER enhancement")
989
+ print(f" β€’ Service proxy functionality")
990
+ print(f" β€’ Multi-language support")
991
+ print(f" β€’ Concurrent request handling")
992
+ print(f" β€’ Comprehensive error handling")
993
+ print(f" β€’ Real-time service health monitoring")
994
+
995
+ else:
996
+ print(f"\n⚠️ SOME UNIFIED SYSTEM TESTS FAILED")
997
+ print(f"❌ {self.result.failed_tests} out of {total} tests failed")
998
+
999
+ print(f"\nπŸ”§ TROUBLESHOOTING STEPS:")
1000
+ print(f"1. Check that all services are running:")
1001
+ print(f" β€’ NER Service: {NER_URL}/health")
1002
+ print(f" β€’ OCR Service: {OCR_URL}/health")
1003
+ print(f" β€’ RAG Service: {RAG_URL}/health")
1004
+ print(f" β€’ Unified App: {UNIFIED_URL}/health")
1005
+ print(f"2. Verify configuration in .env file")
1006
+ print(f"3. Check service logs for errors")
1007
+ print(f"4. Ensure all dependencies are installed")
1008
+ print(f"5. Verify database connectivity")
1009
+
1010
+ return passed == total
1011
+
1012
+ async def main():
1013
+ """Main test runner"""
1014
+ if len(sys.argv) > 1:
1015
+ unified_url = sys.argv[1]
1016
+ else:
1017
+ unified_url = UNIFIED_URL
1018
+
1019
+ # Update global URL
1020
+ global UNIFIED_URL
1021
+ UNIFIED_URL = unified_url
1022
+
1023
+ print(f"πŸ§ͺ Unified AI Services - Comprehensive Test Suite")
1024
+ print(f"πŸ“‘ Testing unified system at: {UNIFIED_URL}")
1025
+ print(f"πŸ”— Expected services:")
1026
+ print(f" β€’ NER Service: {NER_URL}")
1027
+ print(f" β€’ OCR Service: {OCR_URL}")
1028
+ print(f" β€’ RAG Service: {RAG_URL}")
1029
+ print(f" β€’ Unified App: {UNIFIED_URL}")
1030
+
1031
+ print(f"\nMake sure the unified application is running before starting tests.")
1032
+ print(f"Start command: python app.py")
1033
+
1034
+ # Wait for user confirmation
1035
+ input(f"\nPress Enter to start unified system tests...")
1036
+
1037
+ async with UnifiedSystemTester() as tester:
1038
+ success = await tester.run_comprehensive_tests()
1039
+
1040
+ if success:
1041
+ print(f"\nπŸ† UNIFIED SYSTEM VERIFICATION COMPLETE!")
1042
+ print(f"βœ… All services are integrated and operational")
1043
+ print(f"βœ… Combined workflows are working perfectly")
1044
+ print(f"βœ… Ready for production deployment")
1045
+
1046
+ sys.exit(0)
1047
+ else:
1048
+ print(f"\nπŸ”§ UNIFIED SYSTEM NEEDS ATTENTION")
1049
+ print(f"❌ Some functionality is not working correctly")
1050
+ print(f"πŸ“‹ Review the test results above for specific issues")
1051
+
1052
+ sys.exit(1)
1053
+
1054
+ if __name__ == "__main__":
1055
+ asyncio.run(main())