Spaces:

Chirapath
/

SB-PoC

Configuration error

App Files Files Community

Chirapath commited on Jul 9, 2025

Commit

963ae98

verified ·

1 Parent(s): 463f8b6

First draft coding project

Browse files

Files changed (10) hide show

.env +120 -0
README.md +470 -12
app.py +808 -0
configs.py +372 -0
demo.py +527 -0
gettingstart.md +485 -0
manage_services.py +550 -0
requirements.txt +45 -0
setup.py +511 -0
test.py +1055 -0

.env ADDED Viewed

	@@ -0,0 +1,120 @@

+# =================================================================
+# OCR SERVICE CONFIGURATION
+# =================================================================
+# Get these from your Azure Portal -> Document Intelligence resource
+AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://ocrservice256807.cognitiveservices.azure.com/
+AZURE_DOCUMENT_INTELLIGENCE_KEY=3HVIJlvMH1AF5wuNSv0w1qd43AejgulvtdFInpFGJambLtr0DvISJQQJ99BGACqBBLyXJ3w3AAALACOG4NKs
+# Server Configuration (Optional)
+OCR_HOST=0.0.0.0
+OCR_PORT=8400
+OCR_DEBUG=True
+OCR_LOG_LEVEL=INFO
+# # CORS Configuration (Optional - for production)
+# ALLOWED_ORIGINS=["http://localhost:3000", "https://yourdomain.com"]
+# Rate Limiting (Optional - for production)
+RATE_LIMIT_REQUESTS=100
+RATE_LIMIT_WINDOW=3600
+# Web Scraping Configuration (Optional)
+MAX_IMAGES_PER_PAGE=10
+REQUEST_TIMEOUT=30
+USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+# ================================================================
+# RAG SERVICE CONFIGURATION
+# ================================================================
+AZURE_OPENAI_ENDPOINT=https://ai-models-service256807.cognitiveservices.azure.com/
+AZURE_OPENAI_KEY=3rz1lDxopNZktRyKpRbclNb8Evp5l3sgGy7ITNRgBdhKr1pGkY3OJQQJ99BGACYeBjFXJ3w3AAAAACOGcIO1
+AZURE_OPENAI_DEPLOYMENT=text-embedding-3-small
+AZURE_OPENAI_API_VERSION=2024-12-01-preview
+PG_HOST=sbaipocpostgresql.postgres.database.azure.com
+PG_PORT=5432
+PG_DATABASE=vectorsearch
+PG_USER=user
+PG_PASSWORD="P@ssw0rd"
+PG_SSL_MODE=require
+OCR_SERVICE_URL=http://localhost:8400
+RAG_HOST=0.0.0.0
+RAG_PORT=8401
+RAG_DEBUG=True
+RAG_LOG_LEVEL=INFO
+CHUNK_SIZE=1536
+CHUNK_OVERLAP=100
+MIN_CHUNK_SIZE=200
+ALLOWED_ORIGINS=*
+DEFAULT_SEARCH_LIMIT=10
+DEFAULT_SIMILARITY_THRESHOLD=0.5
+MAX_SEARCH_RESULTS=100
+# Database connection pooling
+DB_POOL_MIN_SIZE=2
+DB_POOL_MAX_SIZE=20
+DB_COMMAND_TIMEOUT=60
+# Request timeouts (seconds)
+REQUEST_TIMEOUT=30
+EMBEDDING_TIMEOUT=60
+SERVICE_VERSION=1.0.0
+RAG_SERVICE_URL=http://localhost:8401
+TEST_TIMEOUT=30
+# =================================================================
+# NER SERVICE CONFIGURATION
+# =================================================================
+# Server Configuration
+NER_HOST=0.0.0.0
+NER_PORT=8500
+DEBUG=True
+NER_LOG_LEVEL=INFO
+# OCR Service Configuration (from your existing OCR service)
+OCR_SERVICE_URL=http://localhost:8400
+# DeepSeek API Configuration
+# Get these from your Azure AI service or DeepSeek API
+DEEPSEEK_ENDPOINT=https://ai-models-service256807.services.ai.azure.com/models
+DEEPSEEK_API_KEY=3rz1lDxopNZktRyKpRbclNb8Evp5l3sgGy7ITNRgBdhKr1pGkY3OJQQJ99BGACYeBjFXJ3w3AAAAACOGcIO1
+DEEPSEEK_MODEL=DeepSeek-R1-0528
+# Azure OpenAI Configuration (for embeddings)
+# Get these from your Azure OpenAI resource
+AZURE_OPENAI_ENDPOINT=https://openaiservice2568.openai.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15
+AZURE_OPENAI_API_KEY=8CZSXFphWviu1KBpweiUntRKrJgYR2hApSUT76f5MlBsSjuvKulnJQQJ99BCACYeBjFXJ3w3AAABACOGc2vU
+EMBEDDING_MODEL=text-embedding-3-large
+# Azure Storage Configuration (SAS Authentication)
+# Option 1: Use Storage Account URL + SAS Token (Recommended)
+AZURE_STORAGE_ACCOUNT_URL=https://historylog256807.blob.core.windows.net/
+AZURE_BLOB_SAS_TOKEN="sp=racwdli&st=2025-07-07T09:05:50Z&se=2026-07-06T17:05:50Z&sv=2024-11-04&sr=c&sig=TKDn8t6QLFnO70bstW%2FH%2FjrYGczNnFyOap1qG9RTPEU%3D"
+# Option 2: Use complete SAS URL (Alternative - leave blank if using Option 1)
+#AZURE_BLOB_SAS_URL=https://historylog256807.blob.core.windows.net/historylog?sp=racwdli&st=2025-07-07T09:05:50Z&se=2026-07-06T17:05:50Z&sv=2024-11-04&sr=c&sig=TKDn8t6QLFnO70bstW%2FH%2FjrYGczNnFyOap1qG9RTPEU%3D
+BLOB_CONTAINER=historylog
+# PostgreSQL Configuration (Azure Database for PostgreSQL flexible server)
+POSTGRES_HOST=sbaipocpostgresql.postgres.database.azure.com
+POSTGRES_PORT=5432
+POSTGRES_USER=user
+POSTGRES_PASSWORD="P@ssw0rd"
+POSTGRES_DATABASE=postgres
+AZURE_OPENAI_DEPLOYMENT_NAME=text-embedding-3-large
+# Processing Configuration
+MAX_FILE_SIZE=50  # Maximum file size in MB
+REQUEST_TIMEOUT=300  # Request timeout in seconds
+# CORS Configuration (optional)
+ALLOWED_ORIGINS=*

README.md CHANGED Viewed

@@ -1,12 +1,470 @@
----
-title: SB PoC
-emoji: 📚
-colorFrom: blue
-colorTo: gray
-sdk: gradio
-sdk_version: 5.35.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Unified AI Services
+A comprehensive AI platform that integrates Named Entity Recognition (NER), Optical Character Recognition (OCR), and Retrieval-Augmented Generation (RAG) services into a unified application.
+## 🌟 Features
+### Core Services
+- **NER Service** (Port 8500): Advanced named entity recognition with relationship extraction
+- **OCR Service** (Port 8400): Document processing with Azure Document Intelligence
+- **RAG Service** (Port 8401): Vector search and document retrieval
+- **Unified App** (Port 8000): Coordinated workflows and service management
+### Key Capabilities
+- ✅ Multi-language support (Thai + English)
+- ✅ Complex relationship extraction
+- ✅ Entity deduplication
+- ✅ Graph database exports (Neo4j, GraphML, GEXF)
+- ✅ Vector search with semantic similarity
+- ✅ Document processing (PDF, images, text)
+- ✅ Real-time service health monitoring
+- ✅ Unified workflows combining all services
+- ✅ Comprehensive API documentation
+## 🚀 Quick Start
+### Prerequisites
+- Python 3.8 or higher
+- PostgreSQL with vector extension support
+- Azure OpenAI account
+- Azure Document Intelligence account
+- DeepSeek API account (for advanced NER)
+### Automated Setup
+1. **Clone and navigate to the project directory**
+   ```bash
+   cd unified-ai-services
+   ```
+2. **Run the automated setup**
+   ```bash
+   python setup.py
+   ```
+   This will:
+   - Check your Python environment
+   - Create necessary directories
+   - Help you configure .env file
+   - Install dependencies
+   - Validate configuration
+   - Create startup scripts
+3. **Start the unified application**
+   ```bash
+   python app.py
+   ```
+   Or use the generated scripts:
+   - Windows: `start_services.bat`
+   - Unix/Linux/Mac: `./start_services.sh`
+4. **Run comprehensive tests**
+   ```bash
+   python test_unified.py
+   ```
+   Or use the generated scripts:
+   - Windows: `run_tests.bat`
+   - Unix/Linux/Mac: `./run_tests.sh`
+### Manual Setup
+If you prefer manual setup:
+1. **Install dependencies**
+   ```bash
+   pip install -r requirements.txt
+   ```
+2. **Create .env file** (copy from .env.example)
+   ```bash
+   cp .env.example .env
+   # Edit .env with your configuration
+   ```
+3. **Set up directories**
+   ```bash
+   mkdir -p services exports logs temp tests data
+   ```
+4. **Place service files in the services directory**
+   ```
+   services/
+   ├── ner_service.py
+   ├── ocr_service.py
+   └── rag_service.py
+   ```
+## 📁 Project Structure
+```
+unified-ai-services/
+├── app.py                    # Main unified application
+├── configs.py               # Centralized configuration
+├── setup.py                 # Automated setup script
+├── requirements.txt         # Python dependencies
+├── test_unified.py          # Comprehensive test suite
+├── .env                     # Environment configuration
+├── services/                # Individual service files
+│   ├── ner_service.py      # NER service implementation
+│   ├── ocr_service.py      # OCR service implementation
+│   └── rag_service.py      # RAG service implementation
+├── exports/                 # Generated export files
+├── logs/                    # Application logs
+├── temp/                    # Temporary files
+├── tests/                   # Additional test files
+└── data/                    # Data files
+```
+## ⚙️ Configuration
+### Environment Variables
+The system uses a `.env` file for configuration. Key variables include:
+#### Server Configuration
+```bash
+HOST=0.0.0.0
+DEBUG=True
+MAIN_PORT=8000
+NER_PORT=8500
+OCR_PORT=8400
+RAG_PORT=8401
+```
+#### Database Configuration
+```bash
+POSTGRES_HOST=your-postgres-server.com
+POSTGRES_PORT=5432
+POSTGRES_USER=your-username
+POSTGRES_PASSWORD=your-password
+POSTGRES_DATABASE=postgres
+```
+#### Azure OpenAI Configuration
+```bash
+AZURE_OPENAI_ENDPOINT=https://your-openai.openai.azure.com/
+AZURE_OPENAI_API_KEY=your-api-key
+EMBEDDING_MODEL=text-embedding-3-large
+```
+#### DeepSeek Configuration
+```bash
+DEEPSEEK_ENDPOINT=https://your-deepseek-endpoint/
+DEEPSEEK_API_KEY=your-deepseek-key
+DEEPSEEK_MODEL=DeepSeek-R1-0528
+```
+#### Azure Document Intelligence Configuration
+```bash
+AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://your-di.cognitiveservices.azure.com/
+AZURE_DOCUMENT_INTELLIGENCE_KEY=your-di-key
+```
+#### Azure Storage Configuration
+```bash
+AZURE_STORAGE_ACCOUNT_URL=https://yourstorage.blob.core.windows.net/
+AZURE_BLOB_SAS_TOKEN=your-sas-token
+BLOB_CONTAINER=historylog
+```
+## 🔧 API Documentation
+Once running, access the interactive API documentation:
+- **Unified API**: http://localhost:8000/docs
+- **NER Service**: http://localhost:8500/docs
+- **OCR Service**: http://localhost:8400/docs
+- **RAG Service**: http://localhost:8401/docs
+## 🎯 API Usage Examples
+### 1. Unified Analysis (Text + RAG Indexing)
+```python
+import httpx
+async def unified_analysis():
+    data = {
+        "text": "Your text content here...",
+        "extract_relationships": True,
+        "include_embeddings": False,
+        "generate_graph_files": True,
+        "export_formats": ["neo4j", "json"],
+        "enable_rag_indexing": True,
+        "rag_title": "My Document",
+        "rag_keywords": ["keyword1", "keyword2"]
+    }
+    async with httpx.AsyncClient() as client:
+        response = await client.post("http://localhost:8000/analyze/unified", json=data)
+        return response.json()
+```
+### 2. Combined Search with NER Analysis
+```python
+async def combined_search():
+    data = {
+        "query": "search query here",
+        "limit": 10,
+        "similarity_threshold": 0.2,
+        "include_ner_analysis": True
+    }
+    async with httpx.AsyncClient() as client:
+        response = await client.post("http://localhost:8000/search/combined", json=data)
+        return response.json()
+```
+### 3. File Upload Analysis
+```python
+async def analyze_file():
+    files = {"file": ("document.pdf", open("document.pdf", "rb"), "application/pdf")}
+    data = {
+        "extract_relationships": "true",
+        "generate_graph_files": "true",
+        "export_formats": "neo4j,json"
+    }
+    async with httpx.AsyncClient() as client:
+        response = await client.post("http://localhost:8000/ner/analyze/file", files=files, data=data)
+        return response.json()
+```
+## 🧪 Testing
+### Comprehensive Test Suite
+The project includes comprehensive tests covering:
+- ✅ Service health checks
+- ✅ Individual service functionality
+- ✅ Unified workflow testing
+- ✅ Service proxy functionality
+- ✅ Error handling and resilience
+- ✅ Performance testing
+- ✅ File upload/download testing
+Run tests with:
+```bash
+python test_unified.py
+```
+### Individual Service Tests
+Test individual services:
+```bash
+# Test NER service
+python test_ner.py
+# Test RAG service
+python test_rag.py
+```
+### Quick Health Check
+```bash
+curl http://localhost:8000/health
+```
+## 🔍 Monitoring and Health Checks
+### Health Endpoints
+- **Unified System**: `GET /health`
+- **Individual Services**: `GET /ner/health`, `GET /ocr/health`, `GET /rag/health`
+- **Detailed Status**: `GET /status`
+- **Service Discovery**: `GET /services`
+### Monitoring Features
+- Real-time service health monitoring
+- Response time tracking
+- Service uptime monitoring
+- Error rate tracking
+- Resource usage monitoring
+## 📊 Service Architecture
+```mermaid
+graph TB
+    Client[Client Applications]
+    subgraph "Unified AI Services (Port 8000)"
+        UA[Unified App]
+        Proxy[Service Proxies]
+        Health[Health Monitor]
+    end
+    subgraph "Core Services"
+        NER[NER Service<br/>Port 8500]
+        OCR[OCR Service<br/>Port 8400]
+        RAG[RAG Service<br/>Port 8401]
+    end
+    subgraph "External Services"
+        Azure[Azure Services]
+        DeepSeek[DeepSeek API]
+        DB[(PostgreSQL)]
+    end
+    Client --> UA
+    UA --> Proxy
+    Proxy --> NER
+    Proxy --> OCR
+    Proxy --> RAG
+    NER --> Azure
+    NER --> DeepSeek
+    NER --> DB
+    OCR --> Azure
+    RAG --> Azure
+    RAG --> DB
+    RAG --> OCR
+```
+## 🛠️ Development
+### Adding New Features
+1. **Service Modifications**: Update individual service files in `services/`
+2. **Unified Workflows**: Modify `app.py` for new combined workflows
+3. **Configuration**: Update `configs.py` for new settings
+4. **Tests**: Add tests to `test_unified.py`
+### Debugging
+1. **Check Service Logs**: Services log to console
+2. **Health Checks**: Use `/health` endpoints
+3. **Configuration**: Run `python configs.py` to validate
+4. **Database**: Check PostgreSQL connectivity
+5. **Azure Services**: Verify API keys and endpoints
+### Service Management
+Start individual services for development:
+```bash
+# Start NER service only
+cd services && python ner_service.py
+# Start OCR service only
+cd services && python ocr_service.py
+# Start RAG service only
+cd services && python rag_service.py
+```
+## 🚨 Troubleshooting
+### Common Issues
+#### 1. Services Won't Start
+- Check port availability: `netstat -an | grep :8000`
+- Verify Python dependencies: `pip list`
+- Check .env configuration: `python configs.py`
+#### 2. Database Connection Issues
+- Verify PostgreSQL is running
+- Check connection string in .env
+- Test connectivity: `python -c "import asyncpg; asyncio.run(asyncpg.connect('your-connection-string'))"`
+#### 3. Azure Service Issues
+- Verify API keys and endpoints
+- Check Azure service status
+- Review rate limits and quotas
+#### 4. Performance Issues
+- Monitor resource usage: `top` or Task Manager
+- Check database performance
+- Review log files for errors
+### Error Codes
+- **500**: Internal service error
+- **503**: Service unavailable
+- **400**: Bad request (check input data)
+- **422**: Validation error
+- **404**: Endpoint not found
+## 📈 Performance Optimization
+### Recommended Settings
+#### Production Configuration
+```bash
+DEBUG=False
+MAX_FILE_SIZE=50
+REQUEST_TIMEOUT=300
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=200
+```
+#### Database Optimization
+- Use connection pooling
+- Configure appropriate indexes
+- Monitor query performance
+- Regular maintenance
+#### Service Optimization
+- Enable caching where appropriate
+- Use async operations
+- Optimize batch processing
+- Monitor memory usage
+## 🔐 Security Considerations
+### API Security
+- Implement authentication/authorization as needed
+- Use HTTPS in production
+- Validate all input data
+- Rate limiting
+### Data Security
+- Secure database connections (SSL)
+- Encrypt sensitive data
+- Regular security updates
+- Monitor access logs
+### Azure Security
+- Rotate API keys regularly
+- Use managed identities where possible
+- Monitor usage and costs
+- Follow Azure security best practices
+## 📝 License
+This project is licensed under the MIT License - see the LICENSE file for details.
+## 🤝 Contributing
+1. Fork the repository
+2. Create a feature branch
+3. Make your changes
+4. Add tests for new functionality
+5. Run the test suite
+6. Submit a pull request
+## 📞 Support
+For support and questions:
+1. Check this README for common issues
+2. Review the test suite for usage examples
+3. Check service logs for error details
+4. Verify configuration with `python configs.py`
+## 🎯 Roadmap
+### Current Version (1.0.0)
+- ✅ Unified service integration
+- ✅ Comprehensive testing
+- ✅ Multi-language support
+- ✅ Graph database exports
+### Future Enhancements
+- 🔄 Advanced caching mechanisms
+- 🔄 Enhanced monitoring and analytics
+- 🔄 Additional export formats
+- 🔄 Improved error recovery
+- 🔄 Performance optimizations
+- 🔄 Additional language support

app.py ADDED Viewed

	@@ -0,0 +1,808 @@

+#!/usr/bin/env python3
+"""
+Unified AI Services Application
+Coordinates NER, OCR, and RAG services with combined workflows
+"""
+import asyncio
+import subprocess
+import signal
+import sys
+import os
+import time
+import json
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional, Any, Union
+from contextlib import asynccontextmanager
+from datetime import datetime
+import tempfile
+import io
+import httpx
+import uvicorn
+from fastapi import FastAPI, File, UploadFile, HTTPException, Form, BackgroundTasks, Query
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse, StreamingResponse
+from pydantic import BaseModel, HttpUrl
+import psutil
+# Import our configuration
+from configs import get_config, validate_environment
+# Get configuration
+config = get_config()
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Global service processes
+service_processes: Dict[str, subprocess.Popen] = {}
+service_health: Dict[str, bool] = {}
+# Pydantic Models for Unified API
+class ServiceStatus(BaseModel):
+    name: str
+    status: str
+    port: int
+    health: bool
+    uptime: Optional[float] = None
+    response_time: Optional[float] = None
+class UnifiedAnalysisRequest(BaseModel):
+    text: Optional[str] = None
+    url: Optional[HttpUrl] = None
+    extract_relationships: bool = True
+    include_embeddings: bool = True
+    include_summary: bool = True
+    generate_graph_files: bool = True
+    export_formats: List[str] = ["neo4j", "json", "graphml"]
+    enable_rag_indexing: bool = False
+    rag_title: Optional[str] = None
+    rag_keywords: Optional[List[str]] = None
+    rag_metadata: Optional[Dict[str, Any]] = None
+class CombinedSearchRequest(BaseModel):
+    query: str
+    limit: int = 10
+    similarity_threshold: float = 0.2
+    include_ner_analysis: bool = True
+    ner_export_formats: List[str] = ["json"]
+class UnifiedResponse(BaseModel):
+    success: bool
+    service_calls: List[str]
+    ner_analysis: Optional[Dict[str, Any]] = None
+    rag_document: Optional[Dict[str, Any]] = None
+    search_results: Optional[Dict[str, Any]] = None
+    processing_time: float
+    error: Optional[str] = None
+# Service Management Functions
+async def start_service(service_name: str, script_path: str, port: int) -> bool:
+    """Start a service as a subprocess"""
+    try:
+        logger.info(f"🚀 Starting {service_name} service on port {port}")
+        # Check if port is already in use
+        if is_port_in_use(port):
+            logger.warning(f"Port {port} is already in use. Assuming {service_name} is already running.")
+            return True
+        # Start the service
+        if sys.platform == "win32":
+            process = subprocess.Popen([
+                sys.executable, script_path
+            ], creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
+        else:
+            process = subprocess.Popen([
+                sys.executable, script_path
+            ], preexec_fn=os.setsid)
+        service_processes[service_name] = process
+        # Wait for service to start
+        for i in range(30):  # 30 second timeout
+            await asyncio.sleep(1)
+            if await check_service_health(service_name, port):
+                logger.info(f"✅ {service_name} service started successfully")
+                service_health[service_name] = True
+                return True
+        logger.error(f"❌ {service_name} service failed to start within timeout")
+        return False
+    except Exception as e:
+        logger.error(f"❌ Failed to start {service_name} service: {e}")
+        return False
+def is_port_in_use(port: int) -> bool:
+    """Check if a port is already in use"""
+    try:
+        for conn in psutil.net_connections():
+            if conn.laddr.port == port:
+                return True
+        return False
+    except:
+        return False
+async def check_service_health(service_name: str, port: int) -> bool:
+    """Check if a service is healthy"""
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                f"http://localhost:{port}/health",
+                timeout=5.0
+            )
+            return response.status_code == 200
+    except:
+        return False
+async def get_service_status(service_name: str, port: int) -> ServiceStatus:
+    """Get detailed status of a service"""
+    start_time = time.time()
+    health = await check_service_health(service_name, port)
+    response_time = time.time() - start_time
+    uptime = None
+    if service_name in service_processes:
+        process = service_processes[service_name]
+        if process.poll() is None:  # Process is running
+            try:
+                uptime = time.time() - psutil.Process(process.pid).create_time()
+            except:
+                uptime = None
+    return ServiceStatus(
+        name=service_name,
+        status="running" if health else "down",
+        port=port,
+        health=health,
+        uptime=uptime,
+        response_time=response_time
+    )
+async def stop_all_services():
+    """Stop all managed services"""
+    logger.info("🛑 Stopping all services...")
+    for service_name, process in service_processes.items():
+        try:
+            if process.poll() is None:  # Process is running
+                logger.info(f"Stopping {service_name}...")
+                if sys.platform == "win32":
+                    process.send_signal(signal.CTRL_BREAK_EVENT)
+                else:
+                    os.killpg(os.getpgid(process.pid), signal.SIGTERM)
+                # Wait for graceful shutdown
+                try:
+                    process.wait(timeout=10)
+                except subprocess.TimeoutExpired:
+                    logger.warning(f"Force killing {service_name}")
+                    process.kill()
+                logger.info(f"✅ {service_name} stopped")
+        except Exception as e:
+            logger.error(f"Error stopping {service_name}: {e}")
+# Service Communication Functions
+async def call_ner_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
+    """Call NER service endpoint"""
+    try:
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            url = f"{config.NER_SERVICE_URL}{endpoint}"
+            response = await client.request(method, url, **kwargs)
+            if response.status_code == 200:
+                return response.json()
+            else:
+                raise HTTPException(status_code=response.status_code, detail=response.text)
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=503, detail=f"NER service unavailable: {e}")
+async def call_ocr_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
+    """Call OCR service endpoint"""
+    try:
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            url = f"{config.OCR_SERVICE_URL}{endpoint}"
+            response = await client.request(method, url, **kwargs)
+            if response.status_code == 200:
+                return response.json()
+            else:
+                raise HTTPException(status_code=response.status_code, detail=response.text)
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=503, detail=f"OCR service unavailable: {e}")
+async def call_rag_service(endpoint: str, method: str = "GET", **kwargs) -> Dict[str, Any]:
+    """Call RAG service endpoint"""
+    try:
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            url = f"{config.RAG_SERVICE_URL}{endpoint}"
+            response = await client.request(method, url, **kwargs)
+            if response.status_code == 200:
+                return response.json()
+            else:
+                raise HTTPException(status_code=response.status_code, detail=response.text)
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=503, detail=f"RAG service unavailable: {e}")
+# Application Lifecycle
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan management"""
+    logger.info("🚀 Starting Unified AI Services Application")
+    # Print configuration summary
+    config.print_configuration_summary()
+    # Validate environment
+    if not validate_environment():
+        logger.error("❌ Environment validation failed. Please check your configuration.")
+        raise RuntimeError("Invalid environment configuration")
+    # Define service paths
+    service_definitions = [
+        ("ocr", "services/ocr_service.py", config.ocr.PORT),
+        ("rag", "services/rag_service.py", config.rag.PORT),
+        ("ner", "services/ner_service.py", config.ner.PORT)
+    ]
+    # Start services
+    started_services = []
+    for service_name, script_path, port in service_definitions:
+        if os.path.exists(script_path):
+            success = await start_service(service_name, script_path, port)
+            if success:
+                started_services.append(service_name)
+            else:
+                logger.error(f"Failed to start {service_name} service")
+        else:
+            logger.warning(f"Service script not found: {script_path}")
+    if len(started_services) == 0:
+        logger.error("❌ No services could be started")
+        raise RuntimeError("Failed to start any services")
+    logger.info(f"✅ Started {len(started_services)} services: {', '.join(started_services)}")
+    # Yield control to the application
+    yield
+    # Cleanup
+    await stop_all_services()
+    logger.info("🏁 Unified AI Services Application shutdown complete")
+# FastAPI Application
+app = FastAPI(
+    title="Unified AI Services",
+    description="Coordinated NER, OCR, and RAG services with combined workflows",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# CORS configuration
+allowed_origins = config.ner.ALLOWED_ORIGINS
+if allowed_origins != "*":
+    try:
+        allowed_origins = json.loads(allowed_origins)
+    except:
+        allowed_origins = ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=allowed_origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Main API Endpoints
+@app.get("/")
+async def root():
+    return {
+        "message": "Unified AI Services",
+        "version": "1.0.0",
+        "services": {
+            "ner": f"{config.NER_SERVICE_URL}",
+            "ocr": f"{config.OCR_SERVICE_URL}",
+            "rag": f"{config.RAG_SERVICE_URL}"
+        },
+        "unified_endpoints": {
+            "status": "/status",
+            "analyze": "/analyze",
+            "search": "/search",
+            "combined": "/combined/*"
+        }
+    }
+@app.get("/health")
+async def unified_health():
+    """Unified health check for all services"""
+    services = [
+        ("ner", config.ner.PORT),
+        ("ocr", config.ocr.PORT),
+        ("rag", config.rag.PORT)
+    ]
+    service_statuses = []
+    overall_healthy = True
+    for service_name, port in services:
+        status = await get_service_status(service_name, port)
+        service_statuses.append(status.dict())
+        if not status.health:
+            overall_healthy = False
+    return {
+        "status": "healthy" if overall_healthy else "degraded",
+        "services": service_statuses,
+        "timestamp": datetime.utcnow().isoformat(),
+        "configuration": {
+            "ner_url": config.NER_SERVICE_URL,
+            "ocr_url": config.OCR_SERVICE_URL,
+            "rag_url": config.RAG_SERVICE_URL
+        }
+    }
+@app.get("/status")
+async def detailed_status():
+    """Detailed status of all services"""
+    services = [
+        ("ner", config.ner.PORT),
+        ("ocr", config.ocr.PORT),
+        ("rag", config.rag.PORT)
+    ]
+    detailed_statuses = {}
+    for service_name, port in services:
+        try:
+            # Get service-specific health data
+            async with httpx.AsyncClient() as client:
+                response = await client.get(f"http://localhost:{port}/health", timeout=10.0)
+                if response.status_code == 200:
+                    detailed_statuses[service_name] = response.json()
+                else:
+                    detailed_statuses[service_name] = {"status": "error", "error": f"HTTP {response.status_code}"}
+        except Exception as e:
+            detailed_statuses[service_name] = {"status": "unreachable", "error": str(e)}
+    return {
+        "unified_app": {
+            "status": "running",
+            "port": config.MAIN_PORT,
+            "uptime": time.time() - start_time if 'start_time' in globals() else 0
+        },
+        "services": detailed_statuses,
+        "configuration_valid": validate_environment()
+    }
+# Unified Analysis Endpoints
+@app.post("/analyze/unified")
+async def unified_analysis(request: UnifiedAnalysisRequest):
+    """Unified analysis combining NER and optional RAG indexing"""
+    start_time = time.time()
+    service_calls = []
+    try:
+        # Step 1: NER Analysis
+        ner_data = {
+            "text": request.text,
+            "url": str(request.url) if request.url else None,
+            "extract_relationships": request.extract_relationships,
+            "include_embeddings": request.include_embeddings,
+            "include_summary": request.include_summary,
+            "generate_graph_files": request.generate_graph_files,
+            "export_formats": request.export_formats
+        }
+        # Remove None values
+        ner_data = {k: v for k, v in ner_data.items() if v is not None}
+        if request.text:
+            ner_result = await call_ner_service("/analyze/text", "POST", json=ner_data)
+            service_calls.append("ner_text")
+        elif request.url:
+            ner_result = await call_ner_service("/analyze/url", "POST", json=ner_data)
+            service_calls.append("ner_url")
+        else:
+            raise HTTPException(status_code=400, detail="Either text or url must be provided")
+        # Step 2: Optional RAG indexing
+        rag_result = None
+        if request.enable_rag_indexing and ner_result.get("success"):
+            try:
+                rag_data = {
+                    "title": request.rag_title or f"NER Analysis {ner_result.get('analysis_id', 'unknown')}",
+                    "keywords": request.rag_keywords or ner_result.get("keywords", []),
+                    "metadata": {
+                        **(request.rag_metadata or {}),
+                        "ner_analysis_id": ner_result.get("analysis_id"),
+                        "entity_count": len(ner_result.get("entities", [])),
+                        "relationship_count": len(ner_result.get("relationships", []))
+                    }
+                }
+                if request.text:
+                    # Create temporary file for RAG service
+                    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
+                        f.write(request.text)
+                        temp_path = f.name
+                    try:
+                        with open(temp_path, 'rb') as f:
+                            files = {"file": ("ner_analysis.txt", f, "text/plain")}
+                            form_data = {
+                                "title": rag_data["title"],
+                                "keywords": json.dumps(rag_data["keywords"]),
+                                "metadata": json.dumps(rag_data["metadata"])
+                            }
+                            async with httpx.AsyncClient(timeout=300.0) as client:
+                                response = await client.post(
+                                    f"{config.RAG_SERVICE_URL}/documents/upload",
+                                    files=files,
+                                    data=form_data
+                                )
+                                if response.status_code == 200:
+                                    rag_result = response.json()
+                                    service_calls.append("rag_upload")
+                    finally:
+                        os.unlink(temp_path)
+                elif request.url:
+                    async with httpx.AsyncClient(timeout=300.0) as client:
+                        response = await client.post(
+                            f"{config.RAG_SERVICE_URL}/documents/url",
+                            json={
+                                "url": str(request.url),
+                                **rag_data,
+                                "extract_images": True
+                            }
+                        )
+                        if response.status_code == 200:
+                            rag_result = response.json()
+                            service_calls.append("rag_url")
+            except Exception as e:
+                logger.warning(f"RAG indexing failed: {e}")
+                # Continue without RAG result
+        processing_time = time.time() - start_time
+        return UnifiedResponse(
+            success=True,
+            service_calls=service_calls,
+            ner_analysis=ner_result,
+            rag_document=rag_result,
+            processing_time=processing_time
+        )
+    except Exception as e:
+        processing_time = time.time() - start_time
+        logger.error(f"Unified analysis failed: {e}")
+        return UnifiedResponse(
+            success=False,
+            service_calls=service_calls,
+            processing_time=processing_time,
+            error=str(e)
+        )
+@app.post("/search/combined")
+async def combined_search(request: CombinedSearchRequest):
+    """Combined search using RAG with optional NER analysis of results"""
+    start_time = time.time()
+    service_calls = []
+    try:
+        # Step 1: RAG Search
+        search_data = {
+            "query": request.query,
+            "limit": request.limit,
+            "similarity_threshold": request.similarity_threshold
+        }
+        search_result = await call_rag_service("/search", "POST", json=search_data)
+        service_calls.append("rag_search")
+        # Step 2: Optional NER analysis of search results
+        ner_results = []
+        if request.include_ner_analysis and search_result.get("results"):
+            for i, result in enumerate(search_result["results"][:3]):  # Analyze top 3 results
+                chunk_content = result.get("chunk", {}).get("content", "")
+                if chunk_content:
+                    try:
+                        ner_data = {
+                            "text": chunk_content,
+                            "extract_relationships": True,
+                            "include_embeddings": False,
+                            "include_summary": False,
+                            "generate_graph_files": False,
+                            "export_formats": request.ner_export_formats
+                        }
+                        ner_result = await call_ner_service("/analyze/text", "POST", json=ner_data)
+                        ner_results.append({
+                            "result_index": i,
+                            "ner_analysis": ner_result
+                        })
+                        service_calls.append(f"ner_text_{i}")
+                    except Exception as e:
+                        logger.warning(f"NER analysis failed for result {i}: {e}")
+        processing_time = time.time() - start_time
+        return UnifiedResponse(
+            success=True,
+            service_calls=service_calls,
+            search_results={
+                **search_result,
+                "ner_analyses": ner_results
+            },
+            processing_time=processing_time
+        )
+    except Exception as e:
+        processing_time = time.time() - start_time
+        logger.error(f"Combined search failed: {e}")
+        return UnifiedResponse(
+            success=False,
+            service_calls=service_calls,
+            processing_time=processing_time,
+            error=str(e)
+        )
+# Service Proxy Endpoints
+@app.api_route("/ner/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def ner_proxy(path: str, request):
+    """Proxy requests to NER service"""
+    try:
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            url = f"{config.NER_SERVICE_URL}/{path}"
+            # Forward the request
+            if request.method == "GET":
+                response = await client.get(url, params=request.query_params)
+            else:
+                # Handle different content types
+                content_type = request.headers.get("content-type", "")
+                if "multipart/form-data" in content_type:
+                    # Handle file uploads
+                    form = await request.form()
+                    files = {}
+                    data = {}
+                    for key, value in form.items():
+                        if hasattr(value, 'read'):  # File-like object
+                            files[key] = (value.filename, await value.read(), value.content_type)
+                        else:
+                            data[key] = value
+                    response = await client.request(request.method, url, files=files, data=data)
+                else:
+                    # Handle JSON/other content
+                    body = await request.body()
+                    response = await client.request(
+                        request.method,
+                        url,
+                        content=body,
+                        headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
+                    )
+            # Return response
+            return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=503, detail=f"NER service unavailable: {e}")
+@app.api_route("/ocr/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def ocr_proxy(path: str, request):
+    """Proxy requests to OCR service"""
+    try:
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            url = f"{config.OCR_SERVICE_URL}/{path}"
+            # Forward the request
+            if request.method == "GET":
+                response = await client.get(url, params=request.query_params)
+            else:
+                # Handle different content types
+                content_type = request.headers.get("content-type", "")
+                if "multipart/form-data" in content_type:
+                    # Handle file uploads
+                    form = await request.form()
+                    files = {}
+                    data = {}
+                    for key, value in form.items():
+                        if hasattr(value, 'read'):  # File-like object
+                            files[key] = (value.filename, await value.read(), value.content_type)
+                        else:
+                            data[key] = value
+                    response = await client.request(request.method, url, files=files, data=data)
+                else:
+                    # Handle JSON/other content
+                    body = await request.body()
+                    response = await client.request(
+                        request.method,
+                        url,
+                        content=body,
+                        headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
+                    )
+            # Return response
+            return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=503, detail=f"OCR service unavailable: {e}")
+@app.api_route("/rag/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def rag_proxy(path: str, request):
+    """Proxy requests to RAG service"""
+    try:
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            url = f"{config.RAG_SERVICE_URL}/{path}"
+            # Forward the request
+            if request.method == "GET":
+                response = await client.get(url, params=request.query_params)
+            else:
+                # Handle different content types
+                content_type = request.headers.get("content-type", "")
+                if "multipart/form-data" in content_type:
+                    # Handle file uploads
+                    form = await request.form()
+                    files = {}
+                    data = {}
+                    for key, value in form.items():
+                        if hasattr(value, 'read'):  # File-like object
+                            files[key] = (value.filename, await value.read(), value.content_type)
+                        else:
+                            data[key] = value
+                    response = await client.request(request.method, url, files=files, data=data)
+                else:
+                    # Handle JSON/other content
+                    body = await request.body()
+                    response = await client.request(
+                        request.method,
+                        url,
+                        content=body,
+                        headers={k: v for k, v in request.headers.items() if k.lower() != "host"}
+                    )
+            # Return response
+            return response.json() if response.headers.get("content-type", "").startswith("application/json") else response.text
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=503, detail=f"RAG service unavailable: {e}")
+# Convenience endpoints (direct service access)
+@app.get("/analyze/text")
+@app.post("/analyze/text")
+async def analyze_text_direct(request=None):
+    """Direct access to NER text analysis"""
+    if request:
+        return await call_ner_service("/analyze/text", "POST", json=await request.json())
+    else:
+        return {"message": "Use POST method with text data"}
+@app.get("/documents")
+async def list_documents():
+    """Direct access to RAG document listing"""
+    return await call_rag_service("/documents", "GET")
+@app.post("/search")
+async def search_direct(request):
+    """Direct access to RAG search"""
+    return await call_rag_service("/search", "POST", json=await request.json())
+# Utility endpoints
+@app.get("/services")
+async def list_services():
+    """List all available services and their endpoints"""
+    return {
+        "services": {
+            "ner": {
+                "url": config.NER_SERVICE_URL,
+                "description": "Named Entity Recognition with relationship extraction",
+                "endpoints": [
+                    "/analyze/text", "/analyze/file", "/analyze/url", "/analyze/multi",
+                    "/download/{analysis_id}/{file_type}", "/statistics", "/entity-types", "/relationship-types"
+                ]
+            },
+            "ocr": {
+                "url": config.OCR_SERVICE_URL,
+                "description": "Optical Character Recognition with document processing",
+                "endpoints": [
+                    "/ocr/upload", "/ocr/url", "/ocr/analyze"
+                ]
+            },
+            "rag": {
+                "url": config.RAG_SERVICE_URL,
+                "description": "Retrieval-Augmented Generation with vector search",
+                "endpoints": [
+                    "/documents/upload", "/documents/url", "/search", "/documents", "/documents/{id}"
+                ]
+            }
+        },
+        "unified": {
+            "url": f"http://localhost:{config.MAIN_PORT}",
+            "description": "Unified interface for combined workflows",
+            "endpoints": [
+                "/analyze/unified", "/search/combined", "/ner/*", "/ocr/*", "/rag/*"
+            ]
+        }
+    }
+# Signal handlers for graceful shutdown
+def signal_handler(signum, frame):
+    """Handle shutdown signals"""
+    logger.info(f"Received signal {signum}, initiating graceful shutdown...")
+    asyncio.create_task(stop_all_services())
+# Register signal handlers
+signal.signal(signal.SIGINT, signal_handler)
+signal.signal(signal.SIGTERM, signal_handler)
+# Store start time for uptime calculation
+start_time = time.time()
+if __name__ == "__main__":
+    print("🚀 Starting Unified AI Services Application")
+    print("=" * 50)
+    # Validate configuration before starting
+    if not validate_environment():
+        print("��� Configuration validation failed!")
+        print("Please check your .env file and ensure all required services are configured.")
+        sys.exit(1)
+    print(f"🌐 Main application will run on: http://{config.MAIN_HOST}:{config.MAIN_PORT}")
+    print(f"📊 Services will be started automatically:")
+    print(f"   • NER Service: http://localhost:{config.ner.PORT}")
+    print(f"   • OCR Service: http://localhost:{config.ocr.PORT}")
+    print(f"   • RAG Service: http://localhost:{config.rag.PORT}")
+    print("")
+    print("🎯 Available endpoints:")
+    print("   • Main API: /")
+    print("   • Health Check: /health")
+    print("   • Unified Analysis: /analyze/unified")
+    print("   • Combined Search: /search/combined")
+    print("   • Service Proxies: /ner/*, /ocr/*, /rag/*")
+    print("")
+    print("📖 API Documentation: /docs")
+    print("")
+    try:
+        uvicorn.run(
+            "app:app",
+            host=config.MAIN_HOST,
+            port=config.MAIN_PORT,
+            reload=config.ner.DEBUG,
+            log_level="info"
+        )
+    except KeyboardInterrupt:
+        print("\n🛑 Shutting down gracefully...")
+    finally:
+        # Cleanup will be handled by the lifespan context manager
+        pass

configs.py ADDED Viewed

	@@ -0,0 +1,372 @@

+#!/usr/bin/env python3
+"""
+Centralized Configuration Management for Unified AI Services
+Manages configuration for NER, OCR, and RAG services
+"""
+import os
+import logging
+from pathlib import Path
+from typing import Optional, Dict, Any, List
+from dotenv import load_dotenv
+# Load environment variables
+env_path = Path(__file__).parent / '.env'
+if env_path.exists():
+    load_dotenv(dotenv_path=env_path)
+else:
+    load_dotenv()  # Load from default location
+# Setup logging
+logging.basicConfig(
+    level=getattr(logging, os.getenv("LOG_LEVEL", "INFO").upper()),
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+class BaseConfig:
+    """Base configuration class with common settings"""
+    def __init__(self):
+        # Server Configuration
+        self.HOST = os.getenv("HOST", "0.0.0.0")
+        self.DEBUG = os.getenv("DEBUG", "False").lower() == "true"
+        # Database Configuration (shared by NER and RAG)
+        self.POSTGRES_HOST = os.getenv("POSTGRES_HOST", "")
+        self.POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", "5432"))
+        self.POSTGRES_USER = os.getenv("POSTGRES_USER", "")
+        self.POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "")
+        self.POSTGRES_DATABASE = os.getenv("POSTGRES_DATABASE", "postgres")
+        # Azure OpenAI Configuration (shared by NER and RAG)
+        self.AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "")
+        self.AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "")
+        self.EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-large")
+        self.AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-3-large")
+        # Azure Storage Configuration (shared by NER and RAG)
+        self.AZURE_STORAGE_ACCOUNT_URL = os.getenv("AZURE_STORAGE_ACCOUNT_URL", "")
+        self.AZURE_BLOB_SAS_TOKEN = os.getenv("AZURE_BLOB_SAS_TOKEN", "")
+        self.BLOB_CONTAINER = os.getenv("BLOB_CONTAINER", "historylog")
+        # Processing Configuration
+        self.MAX_FILE_SIZE = int(os.getenv("MAX_FILE_SIZE", "50")) * 1024 * 1024  # Convert MB to bytes
+        self.REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "300"))
+        # CORS Configuration
+        self.ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*")
+    def validate_azure_openai(self) -> bool:
+        """Validate Azure OpenAI configuration"""
+        return bool(
+            self.AZURE_OPENAI_ENDPOINT and
+            self.AZURE_OPENAI_API_KEY and
+            self.AZURE_OPENAI_ENDPOINT != "YOUR_AZURE_OPENAI_ENDPOINT" and
+            self.AZURE_OPENAI_API_KEY != "YOUR_AZURE_OPENAI_KEY"
+        )
+    def validate_postgres(self) -> bool:
+        """Validate PostgreSQL configuration"""
+        return bool(
+            self.POSTGRES_HOST and
+            self.POSTGRES_USER and
+            self.POSTGRES_PASSWORD and
+            self.POSTGRES_DATABASE
+        )
+    def validate_azure_storage(self) -> bool:
+        """Validate Azure Storage configuration"""
+        return bool(
+            self.AZURE_STORAGE_ACCOUNT_URL and
+            self.AZURE_BLOB_SAS_TOKEN
+        )
+class NERConfig(BaseConfig):
+    """Configuration for NER Service"""
+    def __init__(self):
+        super().__init__()
+        self.PORT = int(os.getenv("NER_PORT", "8500"))
+        # DeepSeek Configuration
+        self.DEEPSEEK_ENDPOINT = os.getenv("DEEPSEEK_ENDPOINT", "")
+        self.DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
+        self.DEEPSEEK_MODEL = os.getenv("DEEPSEEK_MODEL", "DeepSeek-R1-0528")
+        # OCR Service Configuration
+        self.OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:8400")
+        # NER Specific Settings
+        self.MAX_TEXT_LENGTH = 100000  # 100KB
+        self.SUPPORTED_TEXT_FORMATS = {'.txt', '.doc', '.docx', '.rtf'}
+        self.SUPPORTED_OCR_FORMATS = {'.pdf', '.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif'}
+        # Entity and Relationship Types
+        self.ENTITY_TYPES = [
+            "PERSON", "ORGANIZATION", "LOCATION", "DATE", "TIME", "MONEY", "PRODUCT", "EVENT",
+            "VEHICLE", "SUSPICIOUS_OBJECT", "ILLEGAL_ACTIVITY", "EVIDENCE", "ILLEGAL_ITEM",
+            "WEAPON", "DRUG", "CHEMICAL", "DOCUMENT", "PHONE_NUMBER", "ADDRESS", "EMAIL"
+        ]
+        self.RELATIONSHIP_TYPES = [
+            # Standard relationships
+            "works_for", "founded", "located_in", "part_of", "associated_with", "owns", "manages",
+            "leads", "reports_to", "collaborates_with", "partners_with", "supplies_to", "acquires",
+            "invests_in", "headquartered_in", "operates_in", "born_in", "lives_in", "studied_at",
+            "graduated_from", "worked_at", "visited", "attended", "participated_in", "sponsored",
+            "developed", "created", "invented", "discovered", "published", "authored", "edited",
+            # Thai relationships
+            "ทำงานที่", "ก่อตั้ง", "ตั้งอยู่ที่", "เป็นส่วนหนึ่งของ", "เกี่ยวข้องกับ", "เป็นเจ้าของ", "จัดการ",
+            "นำโดย", "รายงานต่อ", "ร่วมงานกับ", "เป็นพันธมิตรกับ", "จัดหาให้", "ซื้อกิจการ", "ลงทุนใน",
+            "สำนักงานใหญ่ที่", "ดำเนินการใน", "เกิดที่", "อาศัยอยู่ที่", "ศึกษาที่", "จบการศึกษาจาก",
+            # Law enforcement relationships
+            "arrested_by", "investigated_by", "confiscated_from", "used_in", "evidence_of", "witness_of",
+            "victim_of", "suspect_in", "charged_with", "convicted_of", "sentenced_by", "defended_by",
+            "prosecuted_by", "testified_against", "alibi_for", "found_at", "seized_from", "linked_to",
+            "จับกุมโดย", "สอบสวนโดย", "ยึดจาก", "ใช้ในการ", "หลักฐานของ", "พยานใน", "เหยื่อของ",
+            "ผู้ต้องสงสัยใน", "ถูกตั้งข้อหา", "ถูกตัดสิน", "ถูกพิพากษาโดย", "ต่อสู้คดีโดย", "ฟ้องร้องโดย",
+            "ให้การต่อต้าน", "เป็นข้อแก้ตัวสำหรับ", "พบที่", "ยึดจาก", "เชื่อมโยงกับ",
+            # Criminal relationships
+            "possess_illegal", "transport_illegal", "sell_illegal", "buy_illegal", "hide_evidence",
+            "plan_crime", "commit_crime", "flee_from", "escape_from", "hide_at", "meet_with",
+            "communicate_with", "threaten", "blackmail", "bribe", "corrupt", "money_launder",
+            "ครอบครองของผิดกฎหมาย", "ขนส่งของผิดกฎหมาย", "ขายของผิดกฎหมาย", "ซื้อของผิดกฎหมาย",
+            "ซ่อนหลักฐาน", "วางแผนอาชญากรรม", "กระทำอาชญากรรม", "หลบหนีจาก", "แอบซ่อนที่",
+            "พบปะกับ", "ติดต่อกับ", "ข่มขู่", "แบล็คเมล์", "ให้สินบน", "ทุจริต", "ฟอกเงิน"
+        ]
+    def validate_deepseek(self) -> bool:
+        """Validate DeepSeek configuration"""
+        return bool(
+            self.DEEPSEEK_ENDPOINT and
+            self.DEEPSEEK_API_KEY and
+            self.DEEPSEEK_ENDPOINT != "YOUR_DEEPSEEK_ENDPOINT" and
+            self.DEEPSEEK_API_KEY != "YOUR_DEEPSEEK_API_KEY"
+        )
+class OCRConfig(BaseConfig):
+    """Configuration for OCR Service"""
+    def __init__(self):
+        super().__init__()
+        self.PORT = int(os.getenv("OCR_PORT", "8400"))
+        # Azure Document Intelligence Configuration
+        self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT", "")
+        self.AZURE_DOCUMENT_INTELLIGENCE_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_KEY", "")
+        # Web scraping configuration
+        self.MAX_IMAGES_PER_PAGE = int(os.getenv("MAX_IMAGES_PER_PAGE", "10"))
+        self.USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
+    def validate_azure_document_intelligence(self) -> bool:
+        """Validate Azure Document Intelligence configuration"""
+        return bool(
+            self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT and
+            self.AZURE_DOCUMENT_INTELLIGENCE_KEY and
+            self.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT != "YOUR_FORM_RECOGNIZER_ENDPOINT" and
+            self.AZURE_DOCUMENT_INTELLIGENCE_KEY != "YOUR_FORM_RECOGNIZER_KEY"
+        )
+class RAGConfig(BaseConfig):
+    """Configuration for RAG Service"""
+    def __init__(self):
+        super().__init__()
+        self.PORT = int(os.getenv("RAG_PORT", "8401"))
+        # OCR Service Configuration
+        self.OCR_SERVICE_URL = os.getenv("OCR_SERVICE_URL", "http://localhost:8400")
+        # PostgreSQL Configuration (specific to RAG)
+        self.PG_HOST = self.POSTGRES_HOST
+        self.PG_PORT = self.POSTGRES_PORT
+        self.PG_DATABASE = os.getenv("PG_DATABASE", "vectorsearch")  # RAG uses different default DB
+        self.PG_USER = self.POSTGRES_USER
+        self.PG_PASSWORD = self.POSTGRES_PASSWORD
+        self.PG_SSL_MODE = os.getenv("PG_SSL_MODE", "require")
+        # Chunking Configuration
+        self.CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "1000"))
+        self.CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "200"))
+        self.MIN_CHUNK_SIZE = int(os.getenv("MIN_CHUNK_SIZE", "50"))
+        # Azure OpenAI Configuration (RAG specific)
+        self.AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT", "text-embedding-3-small")
+        self.AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION", "2024-12-01-preview")
+class UnifiedConfig:
+    """Unified configuration for all services"""
+    def __init__(self):
+        self.ner = NERConfig()
+        self.ocr = OCRConfig()
+        self.rag = RAGConfig()
+        # Main app configuration
+        self.MAIN_PORT = int(os.getenv("MAIN_PORT", "8000"))
+        self.MAIN_HOST = os.getenv("MAIN_HOST", "0.0.0.0")
+        # Service URLs (for inter-service communication)
+        self.NER_SERVICE_URL = f"http://localhost:{self.ner.PORT}"
+        self.OCR_SERVICE_URL = f"http://localhost:{self.ocr.PORT}"
+        self.RAG_SERVICE_URL = f"http://localhost:{self.rag.PORT}"
+        # Service Health Check Configuration
+        self.HEALTH_CHECK_TIMEOUT = 30
+        self.HEALTH_CHECK_RETRIES = 3
+        self.HEALTH_CHECK_INTERVAL = 5
+        # Load balancing and routing
+        self.SERVICE_WEIGHTS = {
+            "ner": 1.0,
+            "ocr": 1.0,
+            "rag": 1.0
+        }
+    def validate_all(self) -> Dict[str, Dict[str, bool]]:
+        """Validate all service configurations"""
+        validation_results = {
+            "ner": {
+                "deepseek": self.ner.validate_deepseek(),
+                "azure_openai": self.ner.validate_azure_openai(),
+                "postgres": self.ner.validate_postgres(),
+                "azure_storage": self.ner.validate_azure_storage()
+            },
+            "ocr": {
+                "azure_document_intelligence": self.ocr.validate_azure_document_intelligence()
+            },
+            "rag": {
+                "azure_openai": self.rag.validate_azure_openai(),
+                "postgres": self.rag.validate_postgres()
+            }
+        }
+        return validation_results
+    def get_service_config(self, service_name: str) -> BaseConfig:
+        """Get configuration for a specific service"""
+        service_configs = {
+            "ner": self.ner,
+            "ocr": self.ocr,
+            "rag": self.rag
+        }
+        return service_configs.get(service_name.lower())
+    def get_database_config(self) -> Dict[str, str]:
+        """Get database configuration for services that need it"""
+        return {
+            "host": self.ner.POSTGRES_HOST,
+            "port": str(self.ner.POSTGRES_PORT),
+            "user": self.ner.POSTGRES_USER,
+            "password": self.ner.POSTGRES_PASSWORD,
+            "database": self.ner.POSTGRES_DATABASE,
+            "ssl_mode": getattr(self.rag, 'PG_SSL_MODE', 'require')
+        }
+    def get_azure_openai_config(self) -> Dict[str, str]:
+        """Get Azure OpenAI configuration for services that need it"""
+        return {
+            "endpoint": self.ner.AZURE_OPENAI_ENDPOINT,
+            "api_key": self.ner.AZURE_OPENAI_API_KEY,
+            "embedding_model": self.ner.EMBEDDING_MODEL,
+            "deployment_name": self.ner.AZURE_OPENAI_DEPLOYMENT_NAME
+        }
+    def print_configuration_summary(self):
+        """Print a summary of all configurations"""
+        print("🔧 Configuration Summary")
+        print("=" * 50)
+        # Validate all configurations
+        validation_results = self.validate_all()
+        # NER Service
+        print(f"📝 NER Service (Port {self.ner.PORT}):")
+        print(f"   DeepSeek: {'✅' if validation_results['ner']['deepseek'] else '❌'}")
+        print(f"   Azure OpenAI: {'✅' if validation_results['ner']['azure_openai'] else '❌'}")
+        print(f"   PostgreSQL: {'✅' if validation_results['ner']['postgres'] else '❌'}")
+        print(f"   Azure Storage: {'✅' if validation_results['ner']['azure_storage'] else '❌'}")
+        print(f"   OCR Service URL: {self.ner.OCR_SERVICE_URL}")
+        # OCR Service
+        print(f"\n🔍 OCR Service (Port {self.ocr.PORT}):")
+        print(f"   Azure Document Intelligence: {'✅' if validation_results['ocr']['azure_document_intelligence'] else '❌'}")
+        print(f"   Max File Size: {self.ocr.MAX_FILE_SIZE / (1024*1024):.0f} MB")
+        # RAG Service
+        print(f"\n🧠 RAG Service (Port {self.rag.PORT}):")
+        print(f"   Azure OpenAI: {'✅' if validation_results['rag']['azure_openai'] else '❌'}")
+        print(f"   PostgreSQL: {'✅' if validation_results['rag']['postgres'] else '❌'}")
+        print(f"   OCR Service URL: {self.rag.OCR_SERVICE_URL}")
+        print(f"   Chunk Size: {self.rag.CHUNK_SIZE}")
+        # Main App
+        print(f"\n🌐 Main App (Port {self.MAIN_PORT}):")
+        print(f"   NER Service: {self.NER_SERVICE_URL}")
+        print(f"   OCR Service: {self.OCR_SERVICE_URL}")
+        print(f"   RAG Service: {self.RAG_SERVICE_URL}")
+        # Database Configuration
+        print(f"\n🗄️  Database Configuration:")
+        print(f"   Host: {self.ner.POSTGRES_HOST}")
+        print(f"   Port: {self.ner.POSTGRES_PORT}")
+        print(f"   User: {self.ner.POSTGRES_USER}")
+        print(f"   NER Database: {self.ner.POSTGRES_DATABASE}")
+        print(f"   RAG Database: {self.rag.PG_DATABASE}")
+        # Critical Issues
+        all_validations = []
+        for service, validations in validation_results.items():
+            all_validations.extend(validations.values())
+        if not all(all_validations):
+            print(f"\n⚠️  CONFIGURATION ISSUES DETECTED:")
+            for service, validations in validation_results.items():
+                for component, is_valid in validations.items():
+                    if not is_valid:
+                        print(f"   ❌ {service.upper()}: {component} not configured")
+        else:
+            print(f"\n✅ All configurations are valid!")
+# Global configuration instance
+config = UnifiedConfig()
+def get_config() -> UnifiedConfig:
+    """Get the global configuration instance"""
+    return config
+def validate_environment() -> bool:
+    """Validate the entire environment configuration"""
+    validation_results = config.validate_all()
+    # Check critical components
+    critical_components = [
+        validation_results['ner']['azure_openai'],
+        validation_results['ner']['postgres'],
+        validation_results['ocr']['azure_document_intelligence'],
+        validation_results['rag']['azure_openai'],
+        validation_results['rag']['postgres']
+    ]
+    return all(critical_components)
+if __name__ == "__main__":
+    """Test configuration loading and validation"""
+    print("🧪 Testing Configuration Loading")
+    print("=" * 40)
+    try:
+        config.print_configuration_summary()
+        if validate_environment():
+            print("\n🎉 Environment validation passed!")
+            print("All critical services are properly configured.")
+        else:
+            print("\n❌ Environment validation failed!")
+            print("Some critical services are not properly configured.")
+            print("Please check your .env file and update missing values.")
+    except Exception as e:
+        print(f"\n❌ Configuration loading failed: {e}")
+        logger.error(f"Configuration error: {e}")

demo.py ADDED Viewed

	@@ -0,0 +1,527 @@

+#!/usr/bin/env python3
+"""
+Unified AI Services - Interactive Demo
+Demonstrates the capabilities of the unified system with real examples
+"""
+import asyncio
+import httpx
+import json
+import time
+import sys
+from typing import Dict, Any, Optional
+# Demo configuration
+UNIFIED_URL = "http://localhost:8000"
+TIMEOUT = 60
+# Demo data
+DEMO_TEXTS = {
+    "thai_crime": """
+    คดีอาญาที่สำคัญ: การฆาตกรรมที่กรุงเทพมหานคร
+    เมื่อวันที่ 15 ตุลาคม 2567 เวลา 14:30 น.
+    นายสมชาย ใจดี อายุ 45 ปี อาชีพนักธุรกิจ
+    ถูกพบเสียชีวิตที่คอนโดมิเนียม เดอะ ริเวอร์ ซิตี้ ชั้น 25
+    ผู้ต้องสงสัย: นางสาวมณี รักเงิน อายุ 32 ปี
+    เป็นเลขานุการของผู้เสียชีวิต
+    หลักฐาน: พบสารพิษในแก้วน้ำ
+    เงินจำนวน 500,000 บาท หายไปจากตู้เซฟ
+    กล้องวงจรปิดบันทึกเหตุการณ์ได้
+    ตำรวจสถานีทองหล่อทำการสืบสวน
+    พบว่าผู้ต้องสงสัยมีหนี้สินจำนวนมาก
+    """,
+    "english_business": """
+    Corporate Investigation Report - Tech Acquisition
+    On October 20, 2024, Microsoft Corporation announced the acquisition
+    of AI startup InnovateTech for $2.5 billion USD.
+    Key Personnel:
+    - CEO Sarah Johnson of InnovateTech
+    - VP Acquisitions David Chen at Microsoft
+    - Investment banker Lisa Rodriguez from Goldman Sachs
+    The deal includes:
+    - 150 AI researchers and engineers
+    - Proprietary machine learning algorithms
+    - Patents portfolio worth $800 million
+    - Office locations in San Francisco and Seattle
+    The acquisition strengthens Microsoft's position in the AI market
+    and provides access to advanced natural language processing technology.
+    """,
+    "mixed_content": """
+    International Business Partnership
+    บริษัท ไทยเทค จำกัด (ThaiTech Ltd.)
+    Partnership Agreement between:
+    - ThaiTech Limited (Thailand)
+    - Singapore AI Solutions Pte Ltd (Singapore)
+    - Tokyo Innovation Corp (Japan)
+    ข้อตกลงความร่วมมือ:
+    Investment: $10 million USD (approximately 350 million Thai Baht)
+    Duration: 5 years (2024-2029)
+    Focus: Artificial Intelligence and Machine Learning
+    Key Locations:
+    - Bangkok, Thailand (Head Office)
+    - สิงคโปร์ (Singapore Regional Office)
+    - Tokyo, Japan (R&D Center)
+    Expected Revenue: $50 million USD by 2027
+    """
+}
+class UnifiedDemo:
+    """Interactive demo for the unified AI services"""
+    def __init__(self):
+        self.session = None
+        self.demo_results = {}
+    async def __aenter__(self):
+        self.session = httpx.AsyncClient(timeout=TIMEOUT)
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.session:
+            await self.session.aclose()
+    def print_header(self, title: str):
+        """Print formatted header"""
+        print("\n" + "=" * 70)
+        print(f"  {title}")
+        print("=" * 70)
+    def print_section(self, title: str):
+        """Print section header"""
+        print(f"\n📋 {title}")
+        print("-" * 50)
+    async def check_system_health(self) -> bool:
+        """Check if the unified system is healthy"""
+        try:
+            response = await self.session.get(f"{UNIFIED_URL}/health")
+            if response.status_code == 200:
+                data = response.json()
+                status = data.get("status")
+                services = data.get("services", [])
+                print(f"🏥 System Health: {status}")
+                for service in services:
+                    health_icon = "✅" if service.get("health") else "❌"
+                    print(f"   {health_icon} {service.get('name', 'unknown')}: {service.get('status', 'unknown')}")
+                healthy_services = [s for s in services if s.get("health")]
+                if len(healthy_services) >= 3:  # At least 3 services should be healthy
+                    print("✅ System is ready for demo!")
+                    return True
+                else:
+                    print("❌ System is not ready. Please ensure all services are running.")
+                    return False
+            else:
+                print(f"❌ Health check failed: HTTP {response.status_code}")
+                return False
+        except Exception as e:
+            print(f"❌ Cannot connect to unified system: {e}")
+            print("\n💡 Make sure the unified application is running:")
+            print("   python app.py")
+            return False
+    async def demo_unified_analysis(self, text: str, title: str) -> Optional[Dict[str, Any]]:
+        """Demonstrate unified analysis capabilities"""
+        self.print_section(f"Unified Analysis: {title}")
+        try:
+            print(f"📝 Analyzing text ({len(text)} characters)...")
+            print(f"   Text preview: {text[:100]}...")
+            request_data = {
+                "text": text,
+                "extract_relationships": True,
+                "include_embeddings": False,
+                "include_summary": True,
+                "generate_graph_files": True,
+                "export_formats": ["neo4j", "json"],
+                "enable_rag_indexing": True,
+                "rag_title": f"Demo: {title}",
+                "rag_keywords": ["demo", "analysis", "test"],
+                "rag_metadata": {"demo": True, "category": title.lower()}
+            }
+            start_time = time.time()
+            response = await self.session.post(f"{UNIFIED_URL}/analyze/unified", json=request_data)
+            processing_time = time.time() - start_time
+            if response.status_code == 200:
+                data = response.json()
+                if data.get("success"):
+                    service_calls = data.get("service_calls", [])
+                    ner_analysis = data.get("ner_analysis", {})
+                    rag_document = data.get("rag_document", {})
+                    print(f"✅ Analysis completed in {processing_time:.2f} seconds")
+                    print(f"📞 Service calls: {', '.join(service_calls)}")
+                    # NER Results
+                    if ner_analysis:
+                        entities = ner_analysis.get("entities", [])
+                        relationships = ner_analysis.get("relationships", [])
+                        language = ner_analysis.get("language", "unknown")
+                        print(f"\n🏷️  NER Analysis Results:")
+                        print(f"   Language detected: {language}")
+                        print(f"   Entities found: {len(entities)}")
+                        print(f"   Relationships found: {len(relationships)}")
+                        # Show top entities by type
+                        entity_types = {}
+                        for entity in entities:
+                            entity_type = entity.get("label", "UNKNOWN")
+                            if entity_type not in entity_types:
+                                entity_types[entity_type] = []
+                            entity_types[entity_type].append(entity.get("text", ""))
+                        print(f"\n   📊 Entity breakdown:")
+                        for entity_type, entity_list in sorted(entity_types.items()):
+                            print(f"      {entity_type}: {len(entity_list)} entities")
+                            # Show a few examples
+                            examples = entity_list[:3]
+                            if examples:
+                                print(f"         Examples: {', '.join(examples)}")
+                        # Show relationships
+                        if relationships:
+                            print(f"\n   🔗 Relationship examples:")
+                            for rel in relationships[:3]:
+                                source = rel.get("source_entity", "Unknown")
+                                target = rel.get("target_entity", "Unknown")
+                                rel_type = rel.get("relationship_type", "unknown")
+                                confidence = rel.get("confidence", 0)
+                                print(f"      {source} → {target} ({rel_type}, {confidence:.2f})")
+                        else:
+                            print(f"   ⚠️  No relationships found")
+                    # RAG Results
+                    if rag_document:
+                        print(f"\n💾 RAG Indexing Results:")
+                        print(f"   Document ID: {rag_document.get('document_id', 'N/A')}")
+                        print(f"   Total chunks: {rag_document.get('total_chunks', 0)}")
+                        print(f"   Status: Document indexed for search")
+                    else:
+                        print(f"\n⚠️  RAG indexing was not performed")
+                    # Store results for later use
+                    self.demo_results[title] = data
+                    return data
+                else:
+                    print(f"❌ Analysis failed: {data.get('error', 'Unknown error')}")
+                    return None
+            else:
+                print(f"❌ Request failed: HTTP {response.status_code}")
+                print(f"   Response: {response.text[:200]}")
+                return None
+        except Exception as e:
+            print(f"❌ Analysis error: {e}")
+            return None
+    async def demo_combined_search(self):
+        """Demonstrate combined search capabilities"""
+        self.print_section("Combined Search with NER Enhancement")
+        search_queries = [
+            "murder investigation Thailand",
+            "Microsoft acquisition business",
+            "artificial intelligence partnership"
+        ]
+        for query in search_queries:
+            try:
+                print(f"\n🔍 Searching for: '{query}'")
+                request_data = {
+                    "query": query,
+                    "limit": 3,
+                    "similarity_threshold": 0.1,
+                    "include_ner_analysis": True,
+                    "ner_export_formats": ["json"]
+                }
+                start_time = time.time()
+                response = await self.session.post(f"{UNIFIED_URL}/search/combined", json=request_data)
+                search_time = time.time() - start_time
+                if response.status_code == 200:
+                    data = response.json()
+                    if data.get("success"):
+                        search_results = data.get("search_results", {})
+                        results = search_results.get("results", [])
+                        ner_analyses = search_results.get("ner_analyses", [])
+                        print(f"   ✅ Search completed in {search_time:.2f} seconds")
+                        print(f"   📊 Found {len(results)} results")
+                        for i, result in enumerate(results):
+                            chunk = result.get("chunk", {})
+                            similarity = result.get("similarity_score", 0)
+                            doc_info = result.get("document_info", {})
+                            print(f"\n   📄 Result {i+1} (similarity: {similarity:.3f}):")
+                            print(f"      Title: {doc_info.get('title', 'Untitled')}")
+                            print(f"      Content: {chunk.get('content', '')[:100]}...")
+                        if ner_analyses:
+                            print(f"\n   🏷️  NER analysis performed on top {len(ner_analyses)} results")
+                            for ner_data in ner_analyses:
+                                ner_result = ner_data.get("ner_analysis", {})
+                                if ner_result.get("success"):
+                                    entities = ner_result.get("entities", [])
+                                    relationships = ner_result.get("relationships", [])
+                                    print(f"      Result {ner_data.get('result_index', 0)}: {len(entities)} entities, {len(relationships)} relationships")
+                    else:
+                        print(f"   ❌ Search failed: {data.get('error', 'Unknown error')}")
+                else:
+                    print(f"   ❌ Search failed: HTTP {response.status_code}")
+            except Exception as e:
+                print(f"   ❌ Search error: {e}")
+    async def demo_service_proxies(self):
+        """Demonstrate service proxy functionality"""
+        self.print_section("Service Proxy Demonstration")
+        # Test NER proxy
+        try:
+            print("🧪 Testing NER service proxy...")
+            test_data = {
+                "text": "Quick test: Apple Inc. CEO Tim Cook visited Tokyo, Japan.",
+                "extract_relationships": True,
+                "include_embeddings": False,
+                "generate_graph_files": False
+            }
+            response = await self.session.post(f"{UNIFIED_URL}/ner/analyze/text", json=test_data)
+            if response.status_code == 200:
+                result = response.json()
+                if result.get("success"):
+                    entities = result.get("entities", [])
+                    print(f"   ✅ NER proxy working: found {len(entities)} entities")
+                else:
+                    print(f"   ❌ NER proxy failed: {result.get('error', 'Unknown error')}")
+            else:
+                print(f"   ❌ NER proxy failed: HTTP {response.status_code}")
+        except Exception as e:
+            print(f"   ❌ NER proxy error: {e}")
+        # Test RAG proxy
+        try:
+            print("🧪 Testing RAG service proxy...")
+            response = await self.session.get(f"{UNIFIED_URL}/rag/documents?limit=3")
+            if response.status_code == 200:
+                result = response.json()
+                documents = result.get("documents", [])
+                print(f"   ✅ RAG proxy working: found {len(documents)} documents")
+            else:
+                print(f"   ❌ RAG proxy failed: HTTP {response.status_code}")
+        except Exception as e:
+            print(f"   ❌ RAG proxy error: {e}")
+        # Test OCR proxy
+        try:
+            print("🧪 Testing OCR service proxy...")
+            response = await self.session.get(f"{UNIFIED_URL}/ocr/health")
+            if response.status_code == 200:
+                print(f"   ✅ OCR proxy working: health check passed")
+            else:
+                print(f"   ❌ OCR proxy failed: HTTP {response.status_code}")
+        except Exception as e:
+            print(f"   ❌ OCR proxy error: {e}")
+    async def demo_service_discovery(self):
+        """Demonstrate service discovery"""
+        self.print_section("Service Discovery")
+        try:
+            response = await self.session.get(f"{UNIFIED_URL}/services")
+            if response.status_code == 200:
+                data = response.json()
+                services = data.get("services", {})
+                unified = data.get("unified", {})
+                print(f"🔍 Service discovery successful:")
+                print(f"   Unified endpoint: {unified.get('url', 'N/A')}")
+                for service_name, service_info in services.items():
+                    endpoints = service_info.get("endpoints", [])
+                    description = service_info.get("description", "No description")
+                    url = service_info.get("url", "N/A")
+                    print(f"\n   📡 {service_name.upper()} Service:")
+                    print(f"      URL: {url}")
+                    print(f"      Description: {description}")
+                    print(f"      Endpoints: {len(endpoints)} available")
+                    # Show a few example endpoints
+                    for endpoint in endpoints[:3]:
+                        print(f"         • {endpoint}")
+                    if len(endpoints) > 3:
+                        print(f"         • ... and {len(endpoints) - 3} more")
+            else:
+                print(f"❌ Service discovery failed: HTTP {response.status_code}")
+        except Exception as e:
+            print(f"❌ Service discovery error: {e}")
+    def print_demo_summary(self):
+        """Print summary of demo results"""
+        self.print_section("Demo Summary")
+        if not self.demo_results:
+            print("No analysis results to summarize.")
+            return
+        total_entities = 0
+        total_relationships = 0
+        languages_detected = set()
+        for title, data in self.demo_results.items():
+            ner_analysis = data.get("ner_analysis", {})
+            if ner_analysis:
+                entities = ner_analysis.get("entities", [])
+                relationships = ner_analysis.get("relationships", [])
+                language = ner_analysis.get("language", "unknown")
+                total_entities += len(entities)
+                total_relationships += len(relationships)
+                languages_detected.add(language)
+                print(f"📊 {title}:")
+                print(f"   Language: {language}")
+                print(f"   Entities: {len(entities)}")
+                print(f"   Relationships: {len(relationships)}")
+        print(f"\n🎯 Overall Demo Statistics:")
+        print(f"   Total analyses: {len(self.demo_results)}")
+        print(f"   Total entities extracted: {total_entities}")
+        print(f"   Total relationships found: {total_relationships}")
+        print(f"   Languages detected: {', '.join(languages_detected)}")
+        print(f"\n✨ Capabilities Demonstrated:")
+        print(f"   ✅ Multi-language NER analysis (Thai + English)")
+        print(f"   ✅ Relationship extraction and mapping")
+        print(f"   ✅ RAG document indexing")
+        print(f"   ✅ Combined search with NER enhancement")
+        print(f"   ✅ Service proxy functionality")
+        print(f"   ✅ Unified workflow coordination")
+        print(f"   ✅ Real-time processing and analysis")
+    async def run_interactive_demo(self):
+        """Run the complete interactive demo"""
+        self.print_header("Unified AI Services - Interactive Demo")
+        print("This demo will showcase the capabilities of the unified AI system:")
+        print("• Multi-language NER analysis with relationship extraction")
+        print("• RAG document indexing and vector search")
+        print("• Combined workflows and service coordination")
+        print("• Service proxy functionality")
+        print("• Real-time health monitoring")
+        # Check system health
+        print("\n🔍 Checking system health...")
+        if not await self.check_system_health():
+            print("\n❌ Demo cannot proceed - system is not healthy")
+            return False
+        # Demo 1: Unified Analysis
+        self.print_header("Demo 1: Unified Analysis Capabilities")
+        for title, text in DEMO_TEXTS.items():
+            await self.demo_unified_analysis(text, title.replace("_", " ").title())
+            # Small delay between analyses
+            await asyncio.sleep(1)
+        # Demo 2: Combined Search
+        self.print_header("Demo 2: Combined Search with NER Enhancement")
+        await self.demo_combined_search()
+        # Demo 3: Service Proxies
+        self.print_header("Demo 3: Service Proxy Functionality")
+        await self.demo_service_proxies()
+        # Demo 4: Service Discovery
+        self.print_header("Demo 4: Service Discovery")
+        await self.demo_service_discovery()
+        # Summary
+        self.print_header("Demo Complete")
+        self.print_demo_summary()
+        print(f"\n🎉 Demo completed successfully!")
+        print(f"📖 For more information, visit: http://localhost:8000/docs")
+        return True
+async def main():
+    """Main demo function"""
+    print("🎬 Unified AI Services - Interactive Demo")
+    print("=" * 50)
+    if len(sys.argv) > 1:
+        unified_url = sys.argv[1]
+        global UNIFIED_URL
+        UNIFIED_URL = unified_url
+    print(f"🎯 Demo target: {UNIFIED_URL}")
+    print("\nMake sure the unified application is running:")
+    print("  python app.py")
+    # Wait for user confirmation
+    try:
+        input("\nPress Enter to start the demo (or Ctrl+C to cancel)...")
+    except KeyboardInterrupt:
+        print("\nDemo cancelled.")
+        return
+    async with UnifiedDemo() as demo:
+        success = await demo.run_interactive_demo()
+        if success:
+            print(f"\n🏆 Demo completed successfully!")
+            print(f"The unified AI services are working perfectly.")
+        else:
+            print(f"\n⚠️  Demo encountered some issues.")
+            print(f"Please check the system health and try again.")
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\n\n🛑 Demo interrupted by user")
+    except Exception as e:
+        print(f"\n❌ Demo failed: {e}")
+        sys.exit(1)

gettingstart.md ADDED Viewed

	@@ -0,0 +1,485 @@

+# Getting Started with Unified AI Services
+This guide will walk you through setting up and running the complete Unified AI Services system.
+## 📋 Quick Overview
+The Unified AI Services system consists of:
+- **NER Service** (Port 8500): Named Entity Recognition with relationship extraction
+- **OCR Service** (Port 8400): Optical Character Recognition with document processing
+- **RAG Service** (Port 8401): Retrieval-Augmented Generation with vector search
+- **Unified App** (Port 8000): Main application coordinating all services
+## 🚀 Quick Start (Recommended)
+### Step 1: Automated Setup
+```bash
+# Run the automated setup wizard
+python setup.py
+```
+This will:
+- ✅ Check your Python environment
+- ✅ Create necessary directories
+- ✅ Help configure your .env file
+- ✅ Install dependencies
+- ✅ Validate configuration
+- ✅ Create startup scripts
+### Step 2: Start the System
+```bash
+# Start all services automatically
+python app.py
+```
+Or use the generated scripts:
+- **Windows**: Double-click `start_services.bat`
+- **Linux/Mac**: Run `./start_services.sh`
+### Step 3: Test the System
+```bash
+# Run comprehensive tests
+python test_unified.py
+```
+Or use the generated scripts:
+- **Windows**: Double-click `run_tests.bat`
+- **Linux/Mac**: Run `./run_tests.sh`
+### Step 4: Try the Demo
+```bash
+# Run interactive demo
+python demo.py
+```
+## 📁 File Structure
+After setup, your directory should look like this:
+```
+unified-ai-services/
+├── app.py                    # 🌐 Main unified application
+├── configs.py               # ⚙️ Configuration management
+├── setup.py                 # 🛠️ Automated setup script
+├── manage_services.py       # 🔧 Service management tool
+├── test_unified.py          # 🧪 Comprehensive test suite
+├── demo.py                  # 🎬 Interactive demo
+├── requirements.txt         # 📦 Python dependencies
+├── .env                     # 🔐 Environment configuration
+├── README.md                # 📖 Documentation
+├── GETTING_STARTED.md       # 🚀 This file
+├── services/                # 📂 Service implementations
+│   ├── ner_service.py      # Named Entity Recognition
+│   ├── ocr_service.py      # Optical Character Recognition
+│   └── rag_service.py      # Retrieval-Augmented Generation
+├── exports/                 # 📁 Generated export files
+├── logs/                    # 📝 Application logs
+└── temp/                    # 🗂️ Temporary files
+```
+## ⚙️ Manual Setup (Alternative)
+If you prefer manual setup:
+### Prerequisites
+- Python 3.8 or higher
+- PostgreSQL with vector extension
+- Azure OpenAI account
+- Azure Document Intelligence account
+- DeepSeek API account
+### 1. Install Dependencies
+```bash
+pip install -r requirements.txt
+```
+### 2. Configure Environment
+Create a `.env` file with your configuration:
+```bash
+# Server Configuration
+HOST=0.0.0.0
+MAIN_PORT=8000
+NER_PORT=8500
+OCR_PORT=8400
+RAG_PORT=8401
+# PostgreSQL Configuration
+POSTGRES_HOST=your-postgres-server.com
+POSTGRES_PORT=5432
+POSTGRES_USER=your-username
+POSTGRES_PASSWORD=your-password
+POSTGRES_DATABASE=postgres
+# Azure OpenAI Configuration
+AZURE_OPENAI_ENDPOINT=https://your-openai.openai.azure.com/
+AZURE_OPENAI_API_KEY=your-api-key
+EMBEDDING_MODEL=text-embedding-3-large
+# DeepSeek Configuration (for advanced NER)
+DEEPSEEK_ENDPOINT=https://your-deepseek-endpoint/
+DEEPSEEK_API_KEY=your-deepseek-key
+DEEPSEEK_MODEL=DeepSeek-R1-0528
+# Azure Document Intelligence Configuration
+AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://your-di.cognitiveservices.azure.com/
+AZURE_DOCUMENT_INTELLIGENCE_KEY=your-di-key
+# Azure Storage Configuration
+AZURE_STORAGE_ACCOUNT_URL=https://yourstorage.blob.core.windows.net/
+AZURE_BLOB_SAS_TOKEN=your-sas-token
+BLOB_CONTAINER=historylog
+```
+### 3. Create Directory Structure
+```bash
+mkdir -p services exports logs temp tests data
+```
+### 4. Place Service Files
+Ensure your service files are in the correct locations:
+- `services/ner_service.py`
+- `services/ocr_service.py`
+- `services/rag_service.py`
+## 🔧 Service Management
+### Using the Service Manager
+The `manage_services.py` script provides easy service management:
+```bash
+# Start individual services
+python manage_services.py start ner
+python manage_services.py start ocr
+python manage_services.py start rag
+python manage_services.py start unified
+# Start all services
+python manage_services.py start all
+# Check status
+python manage_services.py status
+# Test services
+python manage_services.py test ner
+python manage_services.py test all
+# Stop services
+python manage_services.py stop all
+# Restart services
+python manage_services.py restart all
+# List available services
+python manage_services.py list
+```
+### Direct Service Management
+Start services individually for development:
+```bash
+# Terminal 1: Start OCR service
+cd services && python ocr_service.py
+# Terminal 2: Start RAG service
+cd services && python rag_service.py
+# Terminal 3: Start NER service
+cd services && python ner_service.py
+# Terminal 4: Start unified application
+python app.py
+```
+## 🧪 Testing and Validation
+### Comprehensive System Tests
+```bash
+# Run all tests
+python test_unified.py
+# Test output will show:
+# ✅ Unified App Health Check
+# ✅ Individual Service Health
+# ✅ Unified Analysis (Text)
+# ✅ Unified Analysis (URL)
+# ✅ Combined Search
+# ✅ Service Proxies
+# ✅ File Upload (Unified)
+# ✅ Service Discovery
+# ✅ System Performance
+# ✅ Error Handling
+```
+### Individual Service Tests
+```bash
+# Test NER service specifically
+python test_ner.py
+# Test RAG service specifically
+python test_rag.py
+```
+### Quick Health Checks
+```bash
+# Check unified system
+curl http://localhost:8000/health
+# Check individual services
+curl http://localhost:8500/health  # NER
+curl http://localhost:8400/health  # OCR
+curl http://localhost:8401/health  # RAG
+```
+## 🎬 Interactive Demo
+The demo script showcases all system capabilities:
+```bash
+python demo.py
+```
+Demo includes:
+- Multi-language text analysis (Thai + English)
+- Entity and relationship extraction
+- RAG document indexing
+- Combined search functionality
+- Service proxy testing
+- Real-time performance monitoring
+## 🌐 API Usage
+### API Documentation
+Once running, access interactive documentation:
+- **Unified API**: http://localhost:8000/docs
+- **NER Service**: http://localhost:8500/docs
+- **OCR Service**: http://localhost:8400/docs
+- **RAG Service**: http://localhost:8401/docs
+### Key Endpoints
+#### Unified Analysis
+```python
+# Analyze text with automatic RAG indexing
+POST /analyze/unified
+{
+    "text": "Your text here...",
+    "extract_relationships": true,
+    "enable_rag_indexing": true,
+    "rag_title": "Document Title"
+}
+```
+#### Combined Search
+```python
+# Search with automatic NER enhancement
+POST /search/combined
+{
+    "query": "search terms",
+    "include_ner_analysis": true,
+    "limit": 10
+}
+```
+#### Service Proxies
+```python
+# Direct access to individual services
+POST /ner/analyze/text     # NER analysis
+POST /ocr/upload           # OCR processing
+POST /rag/search           # RAG search
+GET  /rag/documents        # List documents
+```
+## 🔍 Health Monitoring
+### System Status
+```bash
+# Get overall system health
+GET /health
+# Get detailed status
+GET /status
+# Discover available services
+GET /services
+```
+### Service Monitoring
+Each service provides health information:
+- Response times
+- Uptime
+- Resource usage
+- Configuration status
+- Error rates
+## 🛠️ Troubleshooting
+### Common Issues
+#### 1. Services Won't Start
+**Check ports:**
+```bash
+netstat -an | grep :8000
+netstat -an | grep :8500
+netstat -an | grep :8400
+netstat -an | grep :8401
+```
+**Verify configuration:**
+```bash
+python configs.py
+```
+**Check dependencies:**
+```bash
+pip list | grep fastapi
+pip list | grep asyncpg
+```
+#### 2. Database Connection Issues
+**Test connection:**
+```bash
+# Use your actual connection details
+python -c "
+import asyncio
+import asyncpg
+async def test():
+    conn = await asyncpg.connect('postgresql://user:pass@host:5432/db')
+    print('Connected successfully')
+    await conn.close()
+asyncio.run(test())
+"
+```
+**Common fixes:**
+- Verify PostgreSQL is running
+- Check firewall rules
+- Confirm SSL requirements
+- Validate credentials
+#### 3. Azure Service Issues
+**Check API keys:**
+```bash
+# Test Azure OpenAI
+curl -H "api-key: YOUR_KEY" "YOUR_ENDPOINT/openai/deployments/YOUR_MODEL/embeddings?api-version=2024-02-01"
+# Test Document Intelligence
+curl -H "Ocp-Apim-Subscription-Key: YOUR_KEY" "YOUR_ENDPOINT/formrecognizer/info?api-version=2023-07-31"
+```
+**Common fixes:**
+- Verify API keys are correct
+- Check service regions
+- Confirm quota limits
+- Validate endpoint URLs
+#### 4. Performance Issues
+**Monitor resources:**
+```bash
+# Check system resources
+top
+htop
+python manage_services.py status
+```
+**Common solutions:**
+- Increase system memory
+- Optimize database queries
+- Reduce concurrent requests
+- Check network latency
+### Getting Help
+1. **Check logs**: Services log to console
+2. **Run health checks**: Use `/health` endpoints
+3. **Validate configuration**: Run `python configs.py`
+4. **Test individual services**: Use service manager
+5. **Check database connectivity**: Test connection strings
+6. **Verify Azure services**: Check API endpoints
+### Debug Mode
+Enable debug mode for detailed logging:
+```bash
+# In .env file
+DEBUG=True
+# Or set environment variable
+export DEBUG=true
+python app.py
+```
+## 🚀 Production Deployment
+### Security Considerations
+1. **Environment Variables**: Use secure secret management
+2. **HTTPS**: Enable SSL/TLS in production
+3. **Authentication**: Implement API authentication
+4. **Rate Limiting**: Add request rate limiting
+5. **Input Validation**: Validate all input data
+### Performance Optimization
+1. **Caching**: Implement Redis caching
+2. **Load Balancing**: Use reverse proxy (nginx)
+3. **Database**: Optimize PostgreSQL configuration
+4. **Monitoring**: Set up application monitoring
+5. **Scaling**: Consider horizontal scaling
+### Deployment Options
+1. **Docker**: Containerize services
+2. **Cloud**: Deploy to Azure/AWS/GCP
+3. **Kubernetes**: Orchestrate with k8s
+4. **CI/CD**: Automate deployments
+## 📞 Next Steps
+After successful setup:
+1. **Explore the API**: Use the interactive documentation
+2. **Try the demo**: Run `python demo.py`
+3. **Run tests**: Execute `python test_unified.py`
+4. **Monitor system**: Check health endpoints
+5. **Customize**: Modify services for your needs
+6. **Scale**: Consider production deployment
+## 🎯 Success Indicators
+You know the system is working when:
+- ✅ All health checks pass
+- ✅ Tests complete successfully
+- ✅ Demo runs without errors
+- ✅ API documentation is accessible
+- ✅ Services respond to requests
+- ✅ Database connections work
+- ✅ Azure integrations function
+- ✅ File uploads process correctly
+- ✅ Search returns results
+- ✅ Export files generate properly
+**Congratulations! Your Unified AI Services system is ready to use! 🎉**

manage_services.py ADDED Viewed

	@@ -0,0 +1,550 @@

+#!/usr/bin/env python3
+"""
+Service Management Tool for Unified AI Services
+Helps start, stop, monitor, and troubleshoot individual services
+"""
+import os
+import sys
+import time
+import signal
+import subprocess
+import asyncio
+import json
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import argparse
+import httpx
+import psutil
+# Import configuration if available
+try:
+    from configs import get_config, validate_environment
+    config = get_config()
+except ImportError:
+    print("⚠️  Could not import configs. Using default values.")
+    config = None
+class ServiceManager:
+    """Manages individual services for development and troubleshooting"""
+    def __init__(self):
+        self.processes: Dict[str, subprocess.Popen] = {}
+        self.service_configs = {
+            "ner": {
+                "script": "services/ner_service.py",
+                "port": 8500,
+                "description": "Named Entity Recognition with relationship extraction"
+            },
+            "ocr": {
+                "script": "services/ocr_service.py",
+                "port": 8400,
+                "description": "Optical Character Recognition with document processing"
+            },
+            "rag": {
+                "script": "services/rag_service.py",
+                "port": 8401,
+                "description": "Retrieval-Augmented Generation with vector search"
+            },
+            "unified": {
+                "script": "app.py",
+                "port": 8000,
+                "description": "Unified application coordinating all services"
+            }
+        }
+        # Update ports from config if available
+        if config:
+            self.service_configs["ner"]["port"] = config.ner.PORT
+            self.service_configs["ocr"]["port"] = config.ocr.PORT
+            self.service_configs["rag"]["port"] = config.rag.PORT
+            self.service_configs["unified"]["port"] = config.MAIN_PORT
+    def print_header(self, title: str):
+        """Print formatted header"""
+        print("\n" + "=" * 60)
+        print(f"  {title}")
+        print("=" * 60)
+    def print_service_info(self, service_name: str):
+        """Print service information"""
+        if service_name not in self.service_configs:
+            return
+        service = self.service_configs[service_name]
+        print(f"📝 {service_name.upper()} Service")
+        print(f"   Description: {service['description']}")
+        print(f"   Script: {service['script']}")
+        print(f"   Port: {service['port']}")
+        print(f"   URL: http://localhost:{service['port']}")
+    def is_port_in_use(self, port: int) -> bool:
+        """Check if port is in use"""
+        try:
+            for conn in psutil.net_connections():
+                if conn.laddr.port == port:
+                    return True
+            return False
+        except:
+            return False
+    async def check_service_health(self, service_name: str) -> Tuple[bool, Optional[Dict]]:
+        """Check service health"""
+        if service_name not in self.service_configs:
+            return False, None
+        port = self.service_configs[service_name]["port"]
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(
+                    f"http://localhost:{port}/health",
+                    timeout=5.0
+                )
+                if response.status_code == 200:
+                    return True, response.json()
+                else:
+                    return False, {"error": f"HTTP {response.status_code}"}
+        except Exception as e:
+            return False, {"error": str(e)}
+    def start_service(self, service_name: str) -> bool:
+        """Start a specific service"""
+        if service_name not in self.service_configs:
+            print(f"❌ Unknown service: {service_name}")
+            return False
+        service = self.service_configs[service_name]
+        script_path = service["script"]
+        port = service["port"]
+        # Check if script exists
+        if not Path(script_path).exists():
+            print(f"❌ Service script not found: {script_path}")
+            return False
+        # Check if port is already in use
+        if self.is_port_in_use(port):
+            print(f"⚠️  Port {port} is already in use. Service may already be running.")
+            return False
+        # Check if service is already running in our process list
+        if service_name in self.processes:
+            process = self.processes[service_name]
+            if process.poll() is None:  # Process is still running
+                print(f"⚠️  {service_name} service is already running (PID: {process.pid})")
+                return False
+        try:
+            print(f"🚀 Starting {service_name} service...")
+            print(f"   Script: {script_path}")
+            print(f"   Port: {port}")
+            # Start the service
+            if sys.platform == "win32":
+                process = subprocess.Popen([
+                    sys.executable, script_path
+                ], creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
+            else:
+                process = subprocess.Popen([
+                    sys.executable, script_path
+                ], preexec_fn=os.setsid)
+            self.processes[service_name] = process
+            # Wait a moment for startup
+            time.sleep(2)
+            # Check if process is still running
+            if process.poll() is None:
+                print(f"✅ {service_name} service started successfully (PID: {process.pid})")
+                return True
+            else:
+                print(f"❌ {service_name} service failed to start")
+                return False
+        except Exception as e:
+            print(f"❌ Failed to start {service_name} service: {e}")
+            return False
+    def stop_service(self, service_name: str) -> bool:
+        """Stop a specific service"""
+        if service_name not in self.service_configs:
+            print(f"❌ Unknown service: {service_name}")
+            return False
+        port = self.service_configs[service_name]["port"]
+        # Try to stop our managed process first
+        if service_name in self.processes:
+            process = self.processes[service_name]
+            if process.poll() is None:  # Process is still running
+                try:
+                    print(f"🛑 Stopping {service_name} service (PID: {process.pid})...")
+                    if sys.platform == "win32":
+                        process.send_signal(signal.CTRL_BREAK_EVENT)
+                    else:
+                        os.killpg(os.getpgid(process.pid), signal.SIGTERM)
+                    # Wait for graceful shutdown
+                    try:
+                        process.wait(timeout=10)
+                        print(f"✅ {service_name} service stopped")
+                        del self.processes[service_name]
+                        return True
+                    except subprocess.TimeoutExpired:
+                        print(f"⚠️  Force killing {service_name} service...")
+                        process.kill()
+                        del self.processes[service_name]
+                        return True
+                except Exception as e:
+                    print(f"❌ Error stopping {service_name} service: {e}")
+                    return False
+        # Try to find and stop any process using the port
+        try:
+            for proc in psutil.process_iter(['pid', 'name', 'connections']):
+                try:
+                    for conn in proc.info['connections'] or []:
+                        if conn.laddr.port == port:
+                            print(f"🛑 Found process using port {port} (PID: {proc.pid})")
+                            proc.terminate()
+                            try:
+                                proc.wait(timeout=5)
+                                print(f"✅ Process {proc.pid} terminated")
+                                return True
+                            except psutil.TimeoutExpired:
+                                proc.kill()
+                                print(f"✅ Process {proc.pid} killed")
+                                return True
+                except (psutil.NoSuchProcess, psutil.AccessDenied):
+                    continue
+        except Exception as e:
+            print(f"❌ Error finding process on port {port}: {e}")
+        print(f"⚠️  No running {service_name} service found")
+        return False
+    def stop_all_services(self):
+        """Stop all managed services"""
+        print("🛑 Stopping all services...")
+        for service_name in self.service_configs.keys():
+            self.stop_service(service_name)
+    async def get_service_status(self, service_name: str) -> Dict:
+        """Get detailed service status"""
+        if service_name not in self.service_configs:
+            return {"status": "unknown", "error": "Unknown service"}
+        service = self.service_configs[service_name]
+        port = service["port"]
+        status = {
+            "name": service_name,
+            "description": service["description"],
+            "port": port,
+            "script": service["script"],
+            "managed_process": False,
+            "port_in_use": self.is_port_in_use(port),
+            "health_check": False,
+            "health_data": None
+        }
+        # Check if we have a managed process
+        if service_name in self.processes:
+            process = self.processes[service_name]
+            if process.poll() is None:
+                status["managed_process"] = True
+                status["pid"] = process.pid
+                try:
+                    proc = psutil.Process(process.pid)
+                    status["cpu_percent"] = proc.cpu_percent()
+                    status["memory_mb"] = proc.memory_info().rss / 1024 / 1024
+                    status["create_time"] = proc.create_time()
+                    status["uptime"] = time.time() - proc.create_time()
+                except:
+                    pass
+        # Check health endpoint
+        health_ok, health_data = await self.check_service_health(service_name)
+        status["health_check"] = health_ok
+        status["health_data"] = health_data
+        return status
+    async def status_all_services(self):
+        """Show status of all services"""
+        self.print_header("Service Status Overview")
+        for service_name in self.service_configs.keys():
+            status = await self.get_service_status(service_name)
+            print(f"\n📊 {service_name.upper()} Service")
+            print(f"   Port: {status['port']}")
+            print(f"   Script: {status['script']}")
+            if status["managed_process"]:
+                print(f"   ✅ Managed process running (PID: {status.get('pid', 'unknown')})")
+                if 'uptime' in status:
+                    uptime_str = f"{status['uptime']:.0f} seconds"
+                    print(f"   ⏱️  Uptime: {uptime_str}")
+                if 'cpu_percent' in status:
+                    print(f"   💻 CPU: {status['cpu_percent']:.1f}%")
+                if 'memory_mb' in status:
+                    print(f"   🧠 Memory: {status['memory_mb']:.1f} MB")
+            elif status["port_in_use"]:
+                print(f"   ⚠️  Port in use (external process)")
+            else:
+                print(f"   ❌ Not running")
+            if status["health_check"]:
+                print(f"   ✅ Health check: OK")
+                if status["health_data"]:
+                    health = status["health_data"]
+                    if isinstance(health, dict) and "status" in health:
+                        print(f"      Status: {health['status']}")
+            else:
+                print(f"   ❌ Health check: Failed")
+                if status["health_data"] and "error" in status["health_data"]:
+                    print(f"      Error: {status['health_data']['error']}")
+    async def test_service(self, service_name: str):
+        """Test a specific service"""
+        if service_name not in self.service_configs:
+            print(f"❌ Unknown service: {service_name}")
+            return
+        self.print_header(f"Testing {service_name.upper()} Service")
+        status = await self.get_service_status(service_name)
+        # Basic status
+        if not status["port_in_use"]:
+            print("❌ Service is not running")
+            return
+        if not status["health_check"]:
+            print("❌ Health check failed")
+            if status["health_data"]:
+                print(f"   Error: {status['health_data']}")
+            return
+        print("✅ Service is running and healthy")
+        # Service-specific tests
+        port = status["port"]
+        if service_name == "ner":
+            await self.test_ner_service(port)
+        elif service_name == "ocr":
+            await self.test_ocr_service(port)
+        elif service_name == "rag":
+            await self.test_rag_service(port)
+        elif service_name == "unified":
+            await self.test_unified_service(port)
+    async def test_ner_service(self, port: int):
+        """Test NER service functionality"""
+        print("\n🧪 Testing NER functionality...")
+        try:
+            test_data = {
+                "text": "John Smith works at Microsoft in Seattle.",
+                "extract_relationships": True,
+                "include_embeddings": False,
+                "generate_graph_files": False
+            }
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    f"http://localhost:{port}/analyze/text",
+                    json=test_data,
+                    timeout=30.0
+                )
+                if response.status_code == 200:
+                    result = response.json()
+                    if result.get("success"):
+                        entities = result.get("entities", [])
+                        relationships = result.get("relationships", [])
+                        print(f"   ✅ NER analysis successful")
+                        print(f"   📊 Found {len(entities)} entities, {len(relationships)} relationships")
+                    else:
+                        print(f"   ❌ NER analysis failed: {result.get('error', 'Unknown error')}")
+                else:
+                    print(f"   ❌ NER test failed: HTTP {response.status_code}")
+        except Exception as e:
+            print(f"   ❌ NER test error: {e}")
+    async def test_ocr_service(self, port: int):
+        """Test OCR service functionality"""
+        print("\n🧪 Testing OCR functionality...")
+        try:
+            async with httpx.AsyncClient() as client:
+                # Test health endpoint (OCR doesn't have complex test without files)
+                response = await client.get(f"http://localhost:{port}/health")
+                if response.status_code == 200:
+                    print("   ✅ OCR service is responsive")
+                else:
+                    print(f"   ❌ OCR test failed: HTTP {response.status_code}")
+        except Exception as e:
+            print(f"   ❌ OCR test error: {e}")
+    async def test_rag_service(self, port: int):
+        """Test RAG service functionality"""
+        print("\n🧪 Testing RAG functionality...")
+        try:
+            async with httpx.AsyncClient() as client:
+                # Test document listing
+                response = await client.get(f"http://localhost:{port}/documents?limit=5")
+                if response.status_code == 200:
+                    result = response.json()
+                    documents = result.get("documents", [])
+                    print(f"   ✅ RAG service is responsive")
+                    print(f"   📊 Found {len(documents)} documents in database")
+                else:
+                    print(f"   ❌ RAG test failed: HTTP {response.status_code}")
+        except Exception as e:
+            print(f"   ❌ RAG test error: {e}")
+    async def test_unified_service(self, port: int):
+        """Test unified service functionality"""
+        print("\n🧪 Testing Unified functionality...")
+        try:
+            async with httpx.AsyncClient() as client:
+                # Test service discovery
+                response = await client.get(f"http://localhost:{port}/services")
+                if response.status_code == 200:
+                    result = response.json()
+                    services = result.get("services", {})
+                    print(f"   ✅ Unified service is responsive")
+                    print(f"   📊 Discovered {len(services)} services")
+                else:
+                    print(f"   ❌ Unified test failed: HTTP {response.status_code}")
+        except Exception as e:
+            print(f"   ❌ Unified test error: {e}")
+    def list_services(self):
+        """List all available services"""
+        self.print_header("Available Services")
+        for service_name, service in self.service_configs.items():
+            print(f"\n📝 {service_name}")
+            print(f"   Description: {service['description']}")
+            print(f"   Script: {service['script']}")
+            print(f"   Port: {service['port']}")
+            print(f"   URL: http://localhost:{service['port']}")
+async def main():
+    """Main function with command line interface"""
+    parser = argparse.ArgumentParser(
+        description="Service Management Tool for Unified AI Services",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python manage_services.py start ner          # Start NER service
+  python manage_services.py stop all           # Stop all services
+  python manage_services.py status             # Show status of all services
+  python manage_services.py test rag           # Test RAG service
+  python manage_services.py list               # List available services
+        """
+    )
+    parser.add_argument(
+        "action",
+        choices=["start", "stop", "restart", "status", "test", "list"],
+        help="Action to perform"
+    )
+    parser.add_argument(
+        "service",
+        nargs="?",
+        choices=["ner", "ocr", "rag", "unified", "all"],
+        help="Service to act on (use 'all' for all services)"
+    )
+    args = parser.parse_args()
+    manager = ServiceManager()
+    # Handle actions that don't require a service argument
+    if args.action == "list":
+        manager.list_services()
+        return
+    if args.action == "status":
+        await manager.status_all_services()
+        return
+    # Validate service argument for other actions
+    if not args.service:
+        print("❌ Service argument is required for this action")
+        parser.print_help()
+        return
+    # Handle service-specific actions
+    if args.action == "start":
+        if args.service == "all":
+            # Start services in dependency order
+            services_order = ["ocr", "rag", "ner", "unified"]
+            for service in services_order:
+                success = manager.start_service(service)
+                if success:
+                    # Wait a moment between services
+                    time.sleep(3)
+                else:
+                    print(f"⚠️  Failed to start {service}, continuing with other services...")
+        else:
+            manager.start_service(args.service)
+    elif args.action == "stop":
+        if args.service == "all":
+            manager.stop_all_services()
+        else:
+            manager.stop_service(args.service)
+    elif args.action == "restart":
+        if args.service == "all":
+            print("🔄 Restarting all services...")
+            manager.stop_all_services()
+            time.sleep(2)
+            services_order = ["ocr", "rag", "ner", "unified"]
+            for service in services_order:
+                manager.start_service(service)
+                time.sleep(3)
+        else:
+            print(f"🔄 Restarting {args.service} service...")
+            manager.stop_service(args.service)
+            time.sleep(2)
+            manager.start_service(args.service)
+    elif args.action == "test":
+        if args.service == "all":
+            for service_name in manager.service_configs.keys():
+                await manager.test_service(service_name)
+                print()  # Add spacing between tests
+        else:
+            await manager.test_service(args.service)
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\n🛑 Operation cancelled by user")
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        sys.exit(1)

requirements.txt ADDED Viewed

	@@ -0,0 +1,45 @@

+# Unified AI Services - Python Dependencies
+# Core framework dependencies
+fastapi>=0.104.1
+uvicorn[standard]>=0.24.0
+pydantic>=2.5.0
+python-multipart>=0.0.6
+# HTTP client and async support
+httpx>=0.25.0
+aiofiles>=23.2.1
+# Database dependencies
+asyncpg>=0.29.0
+psycopg2-binary>=2.9.7
+# Azure services
+azure-ai-inference>=1.0.0
+azure-core>=1.29.0
+azure-storage-blob>=12.19.0
+azure-ai-documentintelligence>=1.0.0
+# OpenAI integration
+openai>=1.3.0
+# Document processing
+python-docx>=1.1.0
+beautifulsoup4>=4.12.0
+lxml>=4.9.0
+Pillow>=10.0.0
+# Utilities
+requests>=2.31.0
+numpy>=1.24.0
+python-dotenv>=1.0.0
+psutil>=5.9.0
+# Development and testing (optional)
+pytest>=7.4.0
+pytest-asyncio>=0.21.0
+black>=23.0.0
+flake8>=6.0.0
+# Additional data processing
+pandas>=2.0.0
+scikit-learn>=1.3.0

setup.py ADDED Viewed

	@@ -0,0 +1,511 @@

+#!/usr/bin/env python3
+"""
+Automated Setup and Configuration for Unified AI Services
+Helps set up the environment, validate configurations, and initialize services
+"""
+import os
+import sys
+import json
+import asyncio
+import subprocess
+from pathlib import Path
+from typing import Dict, List, Optional
+import shutil
+def print_header(title: str):
+    """Print a formatted header"""
+    print("\n" + "=" * 60)
+    print(f"  {title}")
+    print("=" * 60)
+def print_step(step: str):
+    """Print a step indicator"""
+    print(f"\n🔧 {step}")
+def check_python_version():
+    """Check Python version compatibility"""
+    print_step("Checking Python version...")
+    version = sys.version_info
+    if version.major < 3 or (version.major == 3 and version.minor < 8):
+        print("❌ Python 3.8 or higher is required")
+        print(f"   Current version: {version.major}.{version.minor}.{version.micro}")
+        return False
+    print(f"✅ Python {version.major}.{version.minor}.{version.micro} is compatible")
+    return True
+def create_directory_structure():
+    """Create necessary directory structure"""
+    print_step("Creating directory structure...")
+    directories = [
+        "services",
+        "exports",
+        "logs",
+        "temp",
+        "tests",
+        "data"
+    ]
+    for directory in directories:
+        path = Path(directory)
+        if not path.exists():
+            path.mkdir(parents=True, exist_ok=True)
+            print(f"   ✅ Created directory: {directory}")
+        else:
+            print(f"   ✓  Directory exists: {directory}")
+def check_service_files():
+    """Check if service files exist"""
+    print_step("Checking service files...")
+    required_files = {
+        "services/ner_service.py": "NER Service",
+        "services/ocr_service.py": "OCR Service",
+        "services/rag_service.py": "RAG Service",
+        "app.py": "Unified Application",
+        "configs.py": "Configuration Management"
+    }
+    missing_files = []
+    for file_path, description in required_files.items():
+        if Path(file_path).exists():
+            print(f"   ✅ {description}: {file_path}")
+        else:
+            print(f"   ❌ {description}: {file_path} (MISSING)")
+            missing_files.append(file_path)
+    if missing_files:
+        print(f"\n⚠️  Missing files detected:")
+        for file_path in missing_files:
+            print(f"   - {file_path}")
+        print("\nPlease ensure all service files are in the correct locations.")
+        return False
+    return True
+def create_env_file():
+    """Create or update .env file with user input"""
+    print_step("Setting up environment configuration...")
+    env_path = Path(".env")
+    if env_path.exists():
+        response = input("   .env file already exists. Overwrite? (y/N): ")
+        if response.lower() != 'y':
+            print("   Keeping existing .env file")
+            return True
+    print("\n📝 Please provide the following configuration values:")
+    print("   (Press Enter to use default values shown in brackets)")
+    # Collect configuration values
+    config_values = {}
+    # Server Configuration
+    print("\n🌐 Server Configuration:")
+    config_values['HOST'] = input("   Host [0.0.0.0]: ") or "0.0.0.0"
+    config_values['DEBUG'] = input("   Debug mode (true/false) [True]: ") or "True"
+    config_values['MAIN_PORT'] = input("   Main app port [8000]: ") or "8000"
+    config_values['NER_PORT'] = input("   NER service port [8500]: ") or "8500"
+    config_values['OCR_PORT'] = input("   OCR service port [8400]: ") or "8400"
+    config_values['RAG_PORT'] = input("   RAG service port [8401]: ") or "8401"
+    # PostgreSQL Configuration
+    print("\n🗄️  PostgreSQL Configuration:")
+    config_values['POSTGRES_HOST'] = input("   PostgreSQL host: ")
+    config_values['POSTGRES_PORT'] = input("   PostgreSQL port [5432]: ") or "5432"
+    config_values['POSTGRES_USER'] = input("   PostgreSQL user: ")
+    config_values['POSTGRES_PASSWORD'] = input("   PostgreSQL password: ")
+    config_values['POSTGRES_DATABASE'] = input("   PostgreSQL database [postgres]: ") or "postgres"
+    # Azure OpenAI Configuration
+    print("\n🤖 Azure OpenAI Configuration:")
+    config_values['AZURE_OPENAI_ENDPOINT'] = input("   Azure OpenAI endpoint: ")
+    config_values['AZURE_OPENAI_API_KEY'] = input("   Azure OpenAI API key: ")
+    config_values['EMBEDDING_MODEL'] = input("   Embedding model [text-embedding-3-large]: ") or "text-embedding-3-large"
+    # DeepSeek Configuration
+    print("\n🧠 DeepSeek Configuration:")
+    config_values['DEEPSEEK_ENDPOINT'] = input("   DeepSeek endpoint: ")
+    config_values['DEEPSEEK_API_KEY'] = input("   DeepSeek API key: ")
+    config_values['DEEPSEEK_MODEL'] = input("   DeepSeek model [DeepSeek-R1-0528]: ") or "DeepSeek-R1-0528"
+    # Azure Document Intelligence Configuration
+    print("\n📄 Azure Document Intelligence Configuration:")
+    config_values['AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT'] = input("   Document Intelligence endpoint: ")
+    config_values['AZURE_DOCUMENT_INTELLIGENCE_KEY'] = input("   Document Intelligence API key: ")
+    # Azure Storage Configuration
+    print("\n💾 Azure Storage Configuration:")
+    config_values['AZURE_STORAGE_ACCOUNT_URL'] = input("   Storage account URL: ")
+    config_values['AZURE_BLOB_SAS_TOKEN'] = input("   Blob SAS token: ")
+    config_values['BLOB_CONTAINER'] = input("   Blob container [historylog]: ") or "historylog"
+    # Write .env file
+    try:
+        with open(".env", "w") as f:
+            f.write("# =================================================================\n")
+            f.write("# Unified AI Services - Environment Configuration\n")
+            f.write("# Generated by setup.py\n")
+            f.write("# =================================================================\n\n")
+            f.write("# Server Configuration\n")
+            f.write(f"HOST={config_values['HOST']}\n")
+            f.write(f"DEBUG={config_values['DEBUG']}\n")
+            f.write(f"MAIN_PORT={config_values['MAIN_PORT']}\n")
+            f.write(f"NER_PORT={config_values['NER_PORT']}\n")
+            f.write(f"OCR_PORT={config_values['OCR_PORT']}\n")
+            f.write(f"RAG_PORT={config_values['RAG_PORT']}\n\n")
+            f.write("# PostgreSQL Configuration\n")
+            f.write(f"POSTGRES_HOST={config_values['POSTGRES_HOST']}\n")
+            f.write(f"POSTGRES_PORT={config_values['POSTGRES_PORT']}\n")
+            f.write(f"POSTGRES_USER={config_values['POSTGRES_USER']}\n")
+            f.write(f"POSTGRES_PASSWORD={config_values['POSTGRES_PASSWORD']}\n")
+            f.write(f"POSTGRES_DATABASE={config_values['POSTGRES_DATABASE']}\n\n")
+            f.write("# Azure OpenAI Configuration\n")
+            f.write(f"AZURE_OPENAI_ENDPOINT={config_values['AZURE_OPENAI_ENDPOINT']}\n")
+            f.write(f"AZURE_OPENAI_API_KEY={config_values['AZURE_OPENAI_API_KEY']}\n")
+            f.write(f"EMBEDDING_MODEL={config_values['EMBEDDING_MODEL']}\n")
+            f.write(f"AZURE_OPENAI_DEPLOYMENT_NAME={config_values['EMBEDDING_MODEL']}\n\n")
+            f.write("# DeepSeek Configuration\n")
+            f.write(f"DEEPSEEK_ENDPOINT={config_values['DEEPSEEK_ENDPOINT']}\n")
+            f.write(f"DEEPSEEK_API_KEY={config_values['DEEPSEEK_API_KEY']}\n")
+            f.write(f"DEEPSEEK_MODEL={config_values['DEEPSEEK_MODEL']}\n\n")
+            f.write("# Azure Document Intelligence Configuration\n")
+            f.write(f"AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT={config_values['AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT']}\n")
+            f.write(f"AZURE_DOCUMENT_INTELLIGENCE_KEY={config_values['AZURE_DOCUMENT_INTELLIGENCE_KEY']}\n\n")
+            f.write("# Azure Storage Configuration\n")
+            f.write(f"AZURE_STORAGE_ACCOUNT_URL={config_values['AZURE_STORAGE_ACCOUNT_URL']}\n")
+            f.write(f"AZURE_BLOB_SAS_TOKEN=\"{config_values['AZURE_BLOB_SAS_TOKEN']}\"\n")
+            f.write(f"BLOB_CONTAINER={config_values['BLOB_CONTAINER']}\n\n")
+            f.write("# Processing Configuration\n")
+            f.write("MAX_FILE_SIZE=50\n")
+            f.write("REQUEST_TIMEOUT=300\n")
+            f.write("LOG_LEVEL=INFO\n")
+            f.write("ALLOWED_ORIGINS=*\n")
+            f.write("\n# RAG Specific Configuration\n")
+            f.write("PG_HOST=${POSTGRES_HOST}\n")
+            f.write("PG_PORT=${POSTGRES_PORT}\n")
+            f.write("PG_DATABASE=vectorsearch\n")
+            f.write("PG_USER=${POSTGRES_USER}\n")
+            f.write("PG_PASSWORD=${POSTGRES_PASSWORD}\n")
+            f.write("PG_SSL_MODE=require\n")
+            f.write("CHUNK_SIZE=1000\n")
+            f.write("CHUNK_OVERLAP=200\n")
+            f.write("MIN_CHUNK_SIZE=50\n")
+        print("   ✅ .env file created successfully")
+        return True
+    except Exception as e:
+        print(f"   ❌ Failed to create .env file: {e}")
+        return False
+def install_dependencies():
+    """Install Python dependencies"""
+    print_step("Installing Python dependencies...")
+    # Create requirements.txt if it doesn't exist
+    requirements_path = Path("requirements.txt")
+    if not requirements_path.exists():
+        print("   Creating requirements.txt file...")
+        requirements = [
+            "fastapi>=0.104.1",
+            "uvicorn[standard]>=0.24.0",
+            "httpx>=0.25.0",
+            "asyncpg>=0.29.0",
+            "psutil>=5.9.0",
+            "pydantic>=2.5.0",
+            "python-dotenv>=1.0.0",
+            "python-multipart>=0.0.6",
+            "azure-ai-inference>=1.0.0",
+            "azure-core>=1.29.0",
+            "azure-storage-blob>=12.19.0",
+            "azure-ai-documentintelligence>=1.0.0",
+            "openai>=1.3.0",
+            "beautifulsoup4>=4.12.0",
+            "requests>=2.31.0",
+            "numpy>=1.24.0",
+            "Pillow>=10.0.0",
+            "python-docx>=1.1.0",
+            "lxml>=4.9.0",
+            "aiofiles>=23.2.1"
+        ]
+        try:
+            with open("requirements.txt", "w") as f:
+                for req in requirements:
+                    f.write(f"{req}\n")
+            print("   ✅ requirements.txt created")
+        except Exception as e:
+            print(f"   ❌ Failed to create requirements.txt: {e}")
+            return False
+    # Install dependencies
+    try:
+        print("   Installing dependencies (this may take a few minutes)...")
+        result = subprocess.run([
+            sys.executable, "-m", "pip", "install", "-r", "requirements.txt"
+        ], capture_output=True, text=True)
+        if result.returncode == 0:
+            print("   ✅ Dependencies installed successfully")
+            return True
+        else:
+            print(f"   ❌ Failed to install dependencies:")
+            print(f"   {result.stderr}")
+            return False
+    except Exception as e:
+        print(f"   ❌ Error installing dependencies: {e}")
+        return False
+def validate_configuration():
+    """Validate the configuration"""
+    print_step("Validating configuration...")
+    try:
+        from configs import get_config, validate_environment
+        config = get_config()
+        config.print_configuration_summary()
+        if validate_environment():
+            print("\n   ✅ Configuration validation passed!")
+            return True
+        else:
+            print("\n   ❌ Configuration validation failed!")
+            print("   Please check your .env file and update missing values.")
+            return False
+    except ImportError as e:
+        print(f"   ❌ Failed to import configuration module: {e}")
+        return False
+    except Exception as e:
+        print(f"   ❌ Configuration validation error: {e}")
+        return False
+async def test_database_connection():
+    """Test database connection"""
+    print_step("Testing database connection...")
+    try:
+        from configs import get_config
+        import asyncpg
+        config = get_config()
+        # Test connection
+        conn = await asyncpg.connect(
+            host=config.ner.POSTGRES_HOST,
+            port=config.ner.POSTGRES_PORT,
+            database=config.ner.POSTGRES_DATABASE,
+            user=config.ner.POSTGRES_USER,
+            password=config.ner.POSTGRES_PASSWORD,
+            ssl='require',
+            timeout=10
+        )
+        # Test basic query
+        version = await conn.fetchval("SELECT version()")
+        await conn.close()
+        print("   ✅ Database connection successful")
+        print(f"   Database version: {version[:50]}...")
+        return True
+    except Exception as e:
+        print(f"   ❌ Database connection failed: {e}")
+        print("\n   💡 Troubleshooting tips:")
+        print("   1. Check your PostgreSQL server is running")
+        print("   2. Verify host, port, username, and password")
+        print("   3. Ensure your IP is allowlisted in firewall rules")
+        print("   4. Check SSL configuration")
+        return False
+def create_startup_script():
+    """Create startup script for easy service management"""
+    print_step("Creating startup script...")
+    # Create startup script for Windows
+    if sys.platform == "win32":
+        script_content = """@echo off
+echo Starting Unified AI Services...
+echo.
+echo Starting in 3 seconds...
+timeout /t 3 /nobreak >nul
+echo Starting unified application...
+python app.py
+pause
+"""
+        with open("start_services.bat", "w") as f:
+            f.write(script_content)
+        print("   ✅ Created start_services.bat")
+    # Create startup script for Unix/Linux/Mac
+    else:
+        script_content = """#!/bin/bash
+echo "Starting Unified AI Services..."
+echo
+echo "Starting in 3 seconds..."
+sleep 3
+echo "Starting unified application..."
+python app.py
+"""
+        with open("start_services.sh", "w") as f:
+            f.write(script_content)
+        # Make executable
+        os.chmod("start_services.sh", 0o755)
+        print("   ✅ Created start_services.sh")
+    return True
+def create_test_script():
+    """Create test script for easy testing"""
+    print_step("Creating test script...")
+    # Create test script for Windows
+    if sys.platform == "win32":
+        script_content = """@echo off
+echo Running Unified System Tests...
+echo.
+echo Make sure the unified application is running first!
+echo Press any key to continue or Ctrl+C to cancel...
+pause >nul
+echo Running comprehensive tests...
+python test_unified.py
+pause
+"""
+        with open("run_tests.bat", "w") as f:
+            f.write(script_content)
+        print("   ✅ Created run_tests.bat")
+    # Create test script for Unix/Linux/Mac
+    else:
+        script_content = """#!/bin/bash
+echo "Running Unified System Tests..."
+echo
+echo "Make sure the unified application is running first!"
+read -p "Press Enter to continue or Ctrl+C to cancel..."
+echo "Running comprehensive tests..."
+python test_unified.py
+"""
+        with open("run_tests.sh", "w") as f:
+            f.write(script_content)
+        # Make executable
+        os.chmod("run_tests.sh", 0o755)
+        print("   ✅ Created run_tests.sh")
+    return True
+def main():
+    """Main setup function"""
+    print_header("Unified AI Services - Automated Setup")
+    print("This script will help you set up the Unified AI Services application.")
+    print("It will:")
+    print("  • Check your Python environment")
+    print("  • Create necessary directories")
+    print("  • Check for required service files")
+    print("  • Set up configuration (.env file)")
+    print("  • Install Python dependencies")
+    print("  • Validate configuration")
+    print("  • Test database connection")
+    print("  • Create startup and test scripts")
+    response = input("\nProceed with setup? (Y/n): ")
+    if response.lower() == 'n':
+        print("Setup cancelled.")
+        return
+    setup_steps = [
+        ("Python Version Check", check_python_version),
+        ("Directory Structure", create_directory_structure),
+        ("Service Files Check", check_service_files),
+        ("Environment Configuration", create_env_file),
+        ("Dependencies Installation", install_dependencies),
+        ("Configuration Validation", validate_configuration),
+        ("Startup Scripts", create_startup_script),
+        ("Test Scripts", create_test_script),
+    ]
+    failed_steps = []
+    for step_name, step_func in setup_steps:
+        try:
+            if not step_func():
+                failed_steps.append(step_name)
+        except Exception as e:
+            print(f"   ❌ {step_name} failed with exception: {e}")
+            failed_steps.append(step_name)
+    # Database connection test (optional)
+    print_step("Testing database connection (optional)...")
+    try:
+        asyncio.run(test_database_connection())
+    except Exception as e:
+        print(f"   ⚠️  Database test skipped: {e}")
+    # Final summary
+    print_header("Setup Summary")
+    if not failed_steps:
+        print("🎉 Setup completed successfully!")
+        print("\nNext steps:")
+        print("1. Review the .env file and update any missing values")
+        print("2. Start the unified application:")
+        if sys.platform == "win32":
+            print("   • Double-click start_services.bat")
+            print("   • Or run: python app.py")
+        else:
+            print("   • Run: ./start_services.sh")
+            print("   • Or run: python app.py")
+        print("3. Test the system:")
+        if sys.platform == "win32":
+            print("   • Double-click run_tests.bat")
+            print("   • Or run: python test_unified.py")
+        else:
+            print("   • Run: ./run_tests.sh")
+            print("   • Or run: python test_unified.py")
+        print("4. Access the API documentation at: http://localhost:8000/docs")
+    else:
+        print("⚠️  Setup completed with some issues:")
+        for step in failed_steps:
+            print(f"   ❌ {step}")
+        print("\nPlease resolve the failed steps before proceeding.")
+        print("You may need to:")
+        print("• Check your internet connection for dependency installation")
+        print("• Verify your Azure service credentials")
+        print("• Ensure PostgreSQL is accessible")
+        print("• Check file permissions")
+if __name__ == "__main__":
+    main()

test.py ADDED Viewed

	@@ -0,0 +1,1055 @@

+#!/usr/bin/env python3
+"""
+Comprehensive Test Suite for Unified AI Services
+Tests the unified application and all integrated services (NER, OCR, RAG)
+Combines functionality from test_rag.py and test_ner.py with new unified tests
+"""
+import asyncio
+import httpx
+import json
+import io
+import sys
+import time
+import tempfile
+import os
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Tuple
+import uuid as python_uuid
+# Import configuration
+try:
+    from configs import get_config, validate_environment
+    config = get_config()
+except ImportError:
+    print("⚠️  Could not import configs. Using default values.")
+    config = None
+# Test configuration
+UNIFIED_URL = "http://localhost:8000"  # Main unified app
+NER_URL = "http://localhost:8500"      # Direct NER service
+OCR_URL = "http://localhost:8400"      # Direct OCR service
+RAG_URL = "http://localhost:8401"      # Direct RAG service
+TEST_TIMEOUT = 300
+# Test data (from original test files)
+THAI_CYANIDE_MURDER_CASE = """
+เหตุฆาตกรรมด้วยไซยาไนด์ พ.ศ. 2566
+คดีฆาตกรรมต่อเนื่องที่สั่นสะเทือนสังคมไทย เกิดขึ้นระหว่างเดือนเมษายน-ตุลาคม พ.ศ. 2566
+โดยมีนางสาวสาริณี ชัยวัฒน์ หรือ "แอม ไซยาไนด์" อายุ 36 ปี เป็นผู้ต้องหา
+รายละเอียดคดี:
+ผู้ต้องหาได้ทำการวางยาพิษไซยาไนด์ (Potassium Cyanide) ในอาหารและเครื่องดื่มของเหยื่อหลายราย
+เหยื่อรายแรกคือ นางสิริพร บุญลาภวนิช อายุ 32 ปี เสียชีวิตเมื่อวันที่ 14 เมษายน 2566 ที่จังหวัดกาญจนบุรี
+เหยื่อรายที่สอง นายสุรชัย อยู่คงคลัง อายุ 45 ปี เสียชีวิตเมื่อวันที่ 2 พฤษภาคม 2566 ที่จังหวัดราชบุรี
+การสืบสวน:
+ตำรวจภูธรภาค 7 ร่วมกับ สำนักงานตำรวจแห่งชาติ ทำการสืบสวน
+พบหลักฐานจากกล้องวงจรปิด (CCTV) ในหลายพื้นที่
+ตรวจพบสารไซยาไนด์ในร่างกายเหยื่อทุกราย
+การจับกุม:
+วันที่ 3 ตุลาคม 2566 ตำรวจจับกุมตัวผู้ต้องหาได้ที่โรงแรมเดอะ บายแซด ตั้งอยู่ที่ ถนนรามคำแหง กรุงเทพมหานคร
+พบเอกสารปลอม บัตรประชาชนปลอม และวัตถุพยานสำคัญอื่นๆ
+ยึดทรัพย์สินที่ได้จากการกระทำผิด มูลค่ารวมกว่า 2 ล้านบาท
+"""
+ENGLISH_CYBERSECURITY_CASE = """
+Major Cybersecurity Incident Report - Operation Digital Shield
+Incident Overview:
+On October 15, 2024, CyberDefense Corp, a leading cybersecurity firm headquartered in Austin, Texas, detected a sophisticated Advanced Persistent Threat (APT) targeting critical infrastructure across Southeast Asia.
+Key Personnel:
+- Dr. Sarah Chen, Chief Security Officer at CyberDefense Corp
+- Agent Michael Rodriguez, FBI Cyber Division
+- Captain Lisa Thompson, US Cyber Command
+Technical Details:
+The attackers used a custom malware strain called "DeepStrike" developed by the Shadow Dragon group
+Primary attack vector: spear-phishing emails containing weaponized PDF documents
+Estimated financial damage: $50 million USD across affected organizations
+"""
+TEST_URLS = [
+    "https://httpbin.org/html",
+    "https://httpbin.org/json"
+]
+class TestResult:
+    """Class to track test results"""
+    def __init__(self):
+        self.total_tests = 0
+        self.passed_tests = 0
+        self.failed_tests = 0
+        self.test_results = []
+        self.warnings = []
+    def add_result(self, test_name: str, passed: bool, message: str = "", details: Dict = None):
+        """Add a test result"""
+        self.total_tests += 1
+        if passed:
+            self.passed_tests += 1
+            print(f"✅ {test_name}")
+            if message:
+                print(f"   {message}")
+        else:
+            self.failed_tests += 1
+            print(f"❌ {test_name}: {message}")
+        self.test_results.append({
+            'test_name': test_name,
+            'passed': passed,
+            'message': message,
+            'details': details or {}
+        })
+    def add_warning(self, test_name: str, message: str):
+        """Add a warning (doesn't count as pass/fail)"""
+        print(f"⚠️  {test_name}: {message}")
+        self.warnings.append({
+            'test_name': test_name,
+            'message': message
+        })
+    def print_summary(self):
+        """Print test summary"""
+        print("\n" + "="*60)
+        print("UNIFIED SYSTEM TEST SUMMARY")
+        print("="*60)
+        print(f"Total Tests: {self.total_tests}")
+        print(f"Passed: {self.passed_tests}")
+        print(f"Failed: {self.failed_tests}")
+        print(f"Warnings: {len(self.warnings)}")
+        print(f"Success Rate: {(self.passed_tests/self.total_tests*100):.1f}%" if self.total_tests > 0 else "0%")
+        if self.failed_tests > 0:
+            print(f"\n❌ FAILED TESTS:")
+            for result in self.test_results:
+                if not result['passed']:
+                    print(f"   - {result['test_name']}: {result['message']}")
+        if self.warnings:
+            print(f"\n⚠️  WARNINGS:")
+            for warning in self.warnings:
+                print(f"   - {warning['test_name']}: {warning['message']}")
+class UnifiedSystemTester:
+    """Main test class for unified system"""
+    def __init__(self):
+        self.result = TestResult()
+        self.session = None
+        self.created_documents = []  # Track for cleanup
+        self.created_analyses = []   # Track for cleanup
+    async def __aenter__(self):
+        self.session = httpx.AsyncClient(timeout=TEST_TIMEOUT)
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.session:
+            await self.session.aclose()
+    async def make_request(self, method: str, url: str, **kwargs) -> httpx.Response:
+        """Make HTTP request with error handling"""
+        try:
+            response = await self.session.request(method, url, **kwargs)
+            return response
+        except httpx.RequestError as e:
+            raise Exception(f"Request failed: {e}")
+    async def test_unified_app_health(self):
+        """Test 1: Unified Application Health Check"""
+        print("🔍 Test 1: Unified Application Health Check")
+        try:
+            response = await self.make_request('GET', f"{UNIFIED_URL}/health")
+            if response.status_code == 200:
+                data = response.json()
+                status = data.get("status")
+                services = data.get("services", [])
+                healthy_services = [s for s in services if s.get("health")]
+                total_services = len(services)
+                if status in ["healthy", "degraded"] and healthy_services:
+                    message = f"Status: {status}, Services: {len(healthy_services)}/{total_services} healthy"
+                    for service in services:
+                        service_status = "✅" if service.get("health") else "❌"
+                        message += f"\n   {service_status} {service.get('name')}: {service.get('status')} ({service.get('response_time', 0):.3f}s)"
+                    self.result.add_result(
+                        "Unified App Health Check",
+                        True,
+                        message,
+                        data
+                    )
+                    return True
+                else:
+                    self.result.add_result(
+                        "Unified App Health Check",
+                        False,
+                        f"System unhealthy: {data}"
+                    )
+                    return False
+            else:
+                self.result.add_result(
+                    "Unified App Health Check",
+                    False,
+                    f"HTTP {response.status_code}: {response.text}"
+                )
+                return False
+        except Exception as e:
+            # Provide detailed diagnostics for connection failures
+            if "connection" in str(e).lower():
+                print(f"\n🔍 Connection Diagnostics:")
+                print(f"   Unified App URL: {UNIFIED_URL}")
+                print(f"   Error: {e}")
+                print(f"\n💡 Possible Issues:")
+                print(f"   1. Unified app is not running")
+                print(f"   2. Wrong host/port in configuration")
+                print(f"   3. Services failed to start")
+                print(f"\n🚀 To Start Unified App:")
+                print(f"   python app.py")
+            self.result.add_result(
+                "Unified App Health Check",
+                False,
+                str(e)
+            )
+            return False
+    async def test_individual_service_health(self):
+        """Test 2: Individual Service Health Checks"""
+        print("🔍 Test 2: Individual Service Health Checks")
+        services = [
+            ("NER", NER_URL),
+            ("OCR", OCR_URL),
+            ("RAG", RAG_URL)
+        ]
+        all_healthy = True
+        service_statuses = {}
+        for service_name, service_url in services:
+            try:
+                response = await self.make_request('GET', f"{service_url}/health")
+                if response.status_code == 200:
+                    data = response.json()
+                    status = data.get("status", "unknown")
+                    service_statuses[service_name] = {
+                        "healthy": True,
+                        "status": status,
+                        "details": data
+                    }
+                    print(f"   ✅ {service_name}: {status}")
+                else:
+                    service_statuses[service_name] = {
+                        "healthy": False,
+                        "status": f"HTTP {response.status_code}",
+                        "details": None
+                    }
+                    print(f"   ❌ {service_name}: HTTP {response.status_code}")
+                    all_healthy = False
+            except Exception as e:
+                service_statuses[service_name] = {
+                    "healthy": False,
+                    "status": f"Error: {e}",
+                    "details": None
+                }
+                print(f"   ❌ {service_name}: {e}")
+                all_healthy = False
+        self.result.add_result(
+            "Individual Service Health",
+            all_healthy,
+            f"Services healthy: {sum(1 for s in service_statuses.values() if s['healthy'])}/{len(services)}",
+            service_statuses
+        )
+        return all_healthy
+    async def test_unified_analysis_text(self):
+        """Test 3: Unified Analysis with Text"""
+        print("🔍 Test 3: Unified Analysis with Text")
+        try:
+            request_data = {
+                "text": THAI_CYANIDE_MURDER_CASE,
+                "extract_relationships": True,
+                "include_embeddings": False,
+                "include_summary": True,
+                "generate_graph_files": True,
+                "export_formats": ["neo4j", "json"],
+                "enable_rag_indexing": True,
+                "rag_title": "Cyanide Murder Case Analysis",
+                "rag_keywords": ["cyanide", "murder", "investigation", "thai"],
+                "rag_metadata": {"test": True, "case_type": "criminal"}
+            }
+            response = await self.make_request('POST', f"{UNIFIED_URL}/analyze/unified", json=request_data)
+            if response.status_code == 200:
+                data = response.json()
+                if data.get("success"):
+                    service_calls = data.get("service_calls", [])
+                    ner_analysis = data.get("ner_analysis", {})
+                    rag_document = data.get("rag_document", {})
+                    processing_time = data.get("processing_time", 0)
+                    # Validate NER analysis
+                    entities = ner_analysis.get("entities", [])
+                    relationships = ner_analysis.get("relationships", [])
+                    # Track analysis for cleanup
+                    if ner_analysis.get("analysis_id"):
+                        self.created_analyses.append(ner_analysis["analysis_id"])
+                    if rag_document and rag_document.get("document_id"):
+                        self.created_documents.append(rag_document["document_id"])
+                    message = f"Service calls: {', '.join(service_calls)}"
+                    message += f"\n   Processing time: {processing_time:.2f}s"
+                    message += f"\n   NER entities: {len(entities)}"
+                    message += f"\n   NER relationships: {len(relationships)}"
+                    if rag_document:
+                        message += f"\n   RAG document ID: {rag_document.get('document_id', 'N/A')}"
+                        message += f"\n   RAG chunks: {rag_document.get('total_chunks', 0)}"
+                    # Check if we got expected service calls
+                    expected_calls = ["ner_text"]
+                    if "enable_rag_indexing" in request_data and request_data["enable_rag_indexing"]:
+                        expected_calls.append("rag_upload")
+                    all_expected_calls = all(call in service_calls for call in expected_calls)
+                    self.result.add_result(
+                        "Unified Analysis (Text)",
+                        all_expected_calls and entities and len(service_calls) > 0,
+                        message,
+                        data
+                    )
+                    return data
+                else:
+                    self.result.add_result(
+                        "Unified Analysis (Text)",
+                        False,
+                        data.get("error", "Analysis failed")
+                    )
+                    return None
+            else:
+                self.result.add_result(
+                    "Unified Analysis (Text)",
+                    False,
+                    f"HTTP {response.status_code}: {response.text[:200]}"
+                )
+                return None
+        except Exception as e:
+            self.result.add_result(
+                "Unified Analysis (Text)",
+                False,
+                str(e)
+            )
+            return None
+    async def test_unified_analysis_url(self):
+        """Test 4: Unified Analysis with URL"""
+        print("🔍 Test 4: Unified Analysis with URL")
+        try:
+            request_data = {
+                "url": "https://httpbin.org/html",
+                "extract_relationships": True,
+                "include_embeddings": False,
+                "include_summary": True,
+                "generate_graph_files": False,
+                "export_formats": ["json"],
+                "enable_rag_indexing": True,
+                "rag_title": "Test URL Document",
+                "rag_keywords": ["test", "url", "httpbin"],
+                "rag_metadata": {"test": True, "source": "httpbin"}
+            }
+            response = await self.make_request('POST', f"{UNIFIED_URL}/analyze/unified", json=request_data)
+            if response.status_code == 200:
+                data = response.json()
+                if data.get("success"):
+                    service_calls = data.get("service_calls", [])
+                    ner_analysis = data.get("ner_analysis", {})
+                    rag_document = data.get("rag_document", {})
+                    # Track for cleanup
+                    if ner_analysis.get("analysis_id"):
+                        self.created_analyses.append(ner_analysis["analysis_id"])
+                    if rag_document and rag_document.get("document_id"):
+                        self.created_documents.append(rag_document["document_id"])
+                    message = f"Service calls: {', '.join(service_calls)}"
+                    message += f"\n   NER analysis ID: {ner_analysis.get('analysis_id', 'N/A')}"
+                    if rag_document:
+                        message += f"\n   RAG document ID: {rag_document.get('document_id', 'N/A')}"
+                    # Check for expected service calls
+                    has_ner_url = "ner_url" in service_calls
+                    has_rag_url = "rag_url" in service_calls
+                    self.result.add_result(
+                        "Unified Analysis (URL)",
+                        has_ner_url and len(service_calls) > 0,
+                        message,
+                        data
+                    )
+                    return data
+                else:
+                    self.result.add_result(
+                        "Unified Analysis (URL)",
+                        False,
+                        data.get("error", "URL analysis failed")
+                    )
+                    return None
+            else:
+                self.result.add_result(
+                    "Unified Analysis (URL)",
+                    False,
+                    f"HTTP {response.status_code}: {response.text[:200]}"
+                )
+                return None
+        except Exception as e:
+            self.result.add_result(
+                "Unified Analysis (URL)",
+                False,
+                str(e)
+            )
+            return None
+    async def test_combined_search(self):
+        """Test 5: Combined Search with NER Analysis"""
+        print("🔍 Test 5: Combined Search with NER Analysis")
+        # Wait a moment for indexing to complete
+        await asyncio.sleep(2)
+        try:
+            request_data = {
+                "query": "investigation murder case",
+                "limit": 5,
+                "similarity_threshold": 0.1,  # Lower threshold for better results
+                "include_ner_analysis": True,
+                "ner_export_formats": ["json"]
+            }
+            response = await self.make_request('POST', f"{UNIFIED_URL}/search/combined", json=request_data)
+            if response.status_code == 200:
+                data = response.json()
+                if data.get("success"):
+                    service_calls = data.get("service_calls", [])
+                    search_results = data.get("search_results", {})
+                    results = search_results.get("results", [])
+                    ner_analyses = search_results.get("ner_analyses", [])
+                    message = f"Service calls: {', '.join(service_calls)}"
+                    message += f"\n   Search results: {len(results)}"
+                    message += f"\n   NER analyses: {len(ner_analyses)}"
+                    message += f"\n   Processing time: {data.get('processing_time', 0):.2f}s"
+                    # Check for expected service calls
+                    has_rag_search = "rag_search" in service_calls
+                    has_ner_analysis = any("ner_text_" in call for call in service_calls)
+                    success = has_rag_search and len(service_calls) > 0
+                    if len(results) == 0:
+                        self.result.add_warning(
+                            "Combined Search",
+                            "No search results found - may need more indexed content"
+                        )
+                    self.result.add_result(
+                        "Combined Search",
+                        success,
+                        message,
+                        data
+                    )
+                    return data
+                else:
+                    self.result.add_result(
+                        "Combined Search",
+                        False,
+                        data.get("error", "Search failed")
+                    )
+                    return None
+            else:
+                self.result.add_result(
+                    "Combined Search",
+                    False,
+                    f"HTTP {response.status_code}: {response.text[:200]}"
+                )
+                return None
+        except Exception as e:
+            self.result.add_result(
+                "Combined Search",
+                False,
+                str(e)
+            )
+            return None
+    async def test_service_proxies(self):
+        """Test 6: Service Proxy Endpoints"""
+        print("🔍 Test 6: Service Proxy Endpoints")
+        proxy_tests = []
+        # Test NER proxy
+        try:
+            ner_data = {
+                "text": "Test entity recognition with John Smith working at Microsoft in Seattle.",
+                "extract_relationships": True,
+                "include_embeddings": False,
+                "generate_graph_files": False
+            }
+            response = await self.make_request('POST', f"{UNIFIED_URL}/ner/analyze/text", json=ner_data)
+            if response.status_code == 200:
+                result = response.json()
+                if result.get("success"):
+                    entities = result.get("entities", [])
+                    proxy_tests.append(("NER Proxy", True, f"Found {len(entities)} entities"))
+                    # Track for cleanup
+                    if result.get("analysis_id"):
+                        self.created_analyses.append(result["analysis_id"])
+                else:
+                    proxy_tests.append(("NER Proxy", False, "Analysis failed"))
+            else:
+                proxy_tests.append(("NER Proxy", False, f"HTTP {response.status_code}"))
+        except Exception as e:
+            proxy_tests.append(("NER Proxy", False, str(e)))
+        # Test OCR proxy
+        try:
+            response = await self.make_request('GET', f"{UNIFIED_URL}/ocr/health")
+            if response.status_code == 200:
+                proxy_tests.append(("OCR Proxy", True, "Health check passed"))
+            else:
+                proxy_tests.append(("OCR Proxy", False, f"HTTP {response.status_code}"))
+        except Exception as e:
+            proxy_tests.append(("OCR Proxy", False, str(e)))
+        # Test RAG proxy
+        try:
+            response = await self.make_request('GET', f"{UNIFIED_URL}/rag/documents?limit=5")
+            if response.status_code == 200:
+                result = response.json()
+                documents = result.get("documents", [])
+                proxy_tests.append(("RAG Proxy", True, f"Found {len(documents)} documents"))
+            else:
+                proxy_tests.append(("RAG Proxy", False, f"HTTP {response.status_code}"))
+        except Exception as e:
+            proxy_tests.append(("RAG Proxy", False, str(e)))
+        # Evaluate proxy tests
+        passed_proxies = sum(1 for _, passed, _ in proxy_tests if passed)
+        total_proxies = len(proxy_tests)
+        for test_name, passed, message in proxy_tests:
+            print(f"   {'✅' if passed else '❌'} {test_name}: {message}")
+        self.result.add_result(
+            "Service Proxies",
+            passed_proxies == total_proxies,
+            f"Proxies working: {passed_proxies}/{total_proxies}",
+            {"proxy_results": proxy_tests}
+        )
+        return passed_proxies > 0
+    async def test_file_upload_unified(self):
+        """Test 7: File Upload through Unified Interface"""
+        print("🔍 Test 7: File Upload through Unified Interface")
+        try:
+            # Create test document
+            test_content = """
+            Technical Report: Advanced AI Systems
+            This report examines the integration of Named Entity Recognition (NER),
+            Optical Character Recognition (OCR), and Retrieval-Augmented Generation (RAG)
+            systems in a unified architecture.
+            Key Personnel:
+            - Dr. Alice Johnson, Lead AI Researcher at TechCorp
+            - Prof. Bob Smith, University of Technology
+            - Sarah Wilson, Data Scientist
+            Technical Components:
+            - Azure OpenAI for embeddings and language processing
+            - PostgreSQL with vector extensions for data storage
+            - FastAPI for microservice architecture
+            The system processes documents through multiple stages:
+            1. OCR extraction for scanned documents
+            2. NER analysis for entity and relationship extraction
+            3. RAG indexing for searchable knowledge base
+            Testing conducted on October 15, 2024 showed 95% accuracy.
+            Total budget: $250,000 for the complete implementation.
+            """
+            # Test through NER proxy (file upload)
+            file_content = test_content.encode('utf-8')
+            files = {"file": ("test_report.txt", io.BytesIO(file_content), "text/plain")}
+            data = {
+                "extract_relationships": "true",
+                "include_embeddings": "false",
+                "include_summary": "true",
+                "generate_graph_files": "true",
+                "export_formats": "neo4j,json"
+            }
+            response = await self.make_request(
+                'POST',
+                f"{UNIFIED_URL}/ner/analyze/file",
+                files=files,
+                data=data
+            )
+            if response.status_code == 200:
+                result = response.json()
+                if result.get("success"):
+                    entities = result.get("entities", [])
+                    relationships = result.get("relationships", [])
+                    # Track for cleanup
+                    if result.get("analysis_id"):
+                        self.created_analyses.append(result["analysis_id"])
+                    message = f"File processed successfully"
+                    message += f"\n   Entities: {len(entities)}"
+                    message += f"\n   Relationships: {len(relationships)}"
+                    message += f"\n   Language: {result.get('language', 'unknown')}"
+                    # Look for expected entities
+                    person_entities = [e for e in entities if e.get('label') == 'PERSON']
+                    org_entities = [e for e in entities if e.get('label') == 'ORGANIZATION']
+                    money_entities = [e for e in entities if e.get('label') == 'MONEY']
+                    message += f"\n   People found: {len(person_entities)}"
+                    message += f"\n   Organizations found: {len(org_entities)}"
+                    message += f"\n   Money amounts found: {len(money_entities)}"
+                    success = len(entities) > 0 and result.get("analysis_id")
+                    self.result.add_result(
+                        "File Upload (Unified)",
+                        success,
+                        message,
+                        result
+                    )
+                    return result
+                else:
+                    self.result.add_result(
+                        "File Upload (Unified)",
+                        False,
+                        result.get("error", "File analysis failed")
+                    )
+                    return None
+            else:
+                self.result.add_result(
+                    "File Upload (Unified)",
+                    False,
+                    f"HTTP {response.status_code}: {response.text[:200]}"
+                )
+                return None
+        except Exception as e:
+            self.result.add_result(
+                "File Upload (Unified)",
+                False,
+                str(e)
+            )
+            return None
+    async def test_service_discovery(self):
+        """Test 8: Service Discovery and Listing"""
+        print("🔍 Test 8: Service Discovery and Listing")
+        try:
+            response = await self.make_request('GET', f"{UNIFIED_URL}/services")
+            if response.status_code == 200:
+                data = response.json()
+                services = data.get("services", {})
+                unified = data.get("unified", {})
+                expected_services = ["ner", "ocr", "rag"]
+                found_services = list(services.keys())
+                message = f"Services discovered: {', '.join(found_services)}"
+                message += f"\n   Unified endpoint: {unified.get('url', 'N/A')}"
+                for service_name, service_info in services.items():
+                    endpoints = service_info.get("endpoints", [])
+                    message += f"\n   {service_name}: {len(endpoints)} endpoints"
+                all_expected_found = all(service in found_services for service in expected_services)
+                self.result.add_result(
+                    "Service Discovery",
+                    all_expected_found,
+                    message,
+                    data
+                )
+                return data
+            else:
+                self.result.add_result(
+                    "Service Discovery",
+                    False,
+                    f"HTTP {response.status_code}"
+                )
+                return None
+        except Exception as e:
+            self.result.add_result(
+                "Service Discovery",
+                False,
+                str(e)
+            )
+            return None
+    async def test_system_performance(self):
+        """Test 9: System Performance and Reliability"""
+        print("🔍 Test 9: System Performance and Reliability")
+        try:
+            # Test multiple concurrent requests
+            tasks = []
+            test_texts = [
+                "Performance test with Apple Inc and CEO Tim Cook in California.",
+                "Reliability testing of Microsoft Azure services in Seattle.",
+                "Load testing with Google Cloud Platform and AI systems."
+            ]
+            start_time = time.time()
+            for i, text in enumerate(test_texts):
+                task = self.make_request(
+                    'POST',
+                    f"{UNIFIED_URL}/ner/analyze/text",
+                    json={
+                        "text": text,
+                        "extract_relationships": True,
+                        "include_embeddings": False,
+                        "generate_graph_files": False
+                    }
+                )
+                tasks.append(task)
+            # Execute concurrent requests
+            responses = await asyncio.gather(*tasks, return_exceptions=True)
+            total_time = time.time() - start_time
+            # Analyze results
+            successful_requests = 0
+            total_entities = 0
+            for i, response in enumerate(responses):
+                if isinstance(response, Exception):
+                    continue
+                if response.status_code == 200:
+                    result = response.json()
+                    if result.get("success"):
+                        successful_requests += 1
+                        entities = result.get("entities", [])
+                        total_entities += len(entities)
+                        # Track for cleanup
+                        if result.get("analysis_id"):
+                            self.created_analyses.append(result["analysis_id"])
+            avg_time_per_request = total_time / len(test_texts)
+            message = f"Concurrent requests: {successful_requests}/{len(test_texts)} successful"
+            message += f"\n   Total time: {total_time:.2f}s"
+            message += f"\n   Avg time per request: {avg_time_per_request:.2f}s"
+            message += f"\n   Total entities found: {total_entities}"
+            # Performance criteria
+            performance_ok = (
+                successful_requests >= len(test_texts) * 0.8 and  # 80% success rate
+                avg_time_per_request < 10.0  # Under 10 seconds per request
+            )
+            self.result.add_result(
+                "System Performance",
+                performance_ok,
+                message,
+                {
+                    "successful_requests": successful_requests,
+                    "total_requests": len(test_texts),
+                    "total_time": total_time,
+                    "avg_time_per_request": avg_time_per_request,
+                    "total_entities": total_entities
+                }
+            )
+            return performance_ok
+        except Exception as e:
+            self.result.add_result(
+                "System Performance",
+                False,
+                str(e)
+            )
+            return False
+    async def test_error_handling(self):
+        """Test 10: Error Handling and Resilience"""
+        print("🔍 Test 10: Error Handling and Resilience")
+        error_tests = []
+        # Test 1: Invalid unified analysis request
+        try:
+            response = await self.make_request(
+                'POST',
+                f"{UNIFIED_URL}/analyze/unified",
+                json={"invalid": "data"}
+            )
+            if response.status_code in [400, 422]:  # Expected validation error
+                error_tests.append(("Invalid Request Handling", True, "Properly rejected invalid data"))
+            else:
+                error_tests.append(("Invalid Request Handling", False, f"Unexpected status: {response.status_code}"))
+        except Exception as e:
+            error_tests.append(("Invalid Request Handling", False, str(e)))
+        # Test 2: Empty text analysis
+        try:
+            response = await self.make_request(
+                'POST',
+                f"{UNIFIED_URL}/ner/analyze/text",
+                json={"text": "", "extract_relationships": True}
+            )
+            if response.status_code in [400, 422]:  # Expected validation error
+                error_tests.append(("Empty Text Handling", True, "Properly rejected empty text"))
+            else:
+                result = response.json()
+                if not result.get("success"):
+                    error_tests.append(("Empty Text Handling", True, "Failed gracefully"))
+                else:
+                    error_tests.append(("Empty Text Handling", False, "Should have failed"))
+        except Exception as e:
+            error_tests.append(("Empty Text Handling", False, str(e)))
+        # Test 3: Invalid URL
+        try:
+            response = await self.make_request(
+                'POST',
+                f"{UNIFIED_URL}/analyze/unified",
+                json={
+                    "url": "https://invalid-url-that-does-not-exist-12345.com",
+                    "extract_relationships": True
+                }
+            )
+            if response.status_code == 200:
+                result = response.json()
+                if not result.get("success"):
+                    error_tests.append(("Invalid URL Handling", True, "Failed gracefully with invalid URL"))
+                else:
+                    error_tests.append(("Invalid URL Handling", False, "Should have failed"))
+            else:
+                error_tests.append(("Invalid URL Handling", True, f"Rejected invalid URL (HTTP {response.status_code})"))
+        except Exception as e:
+            error_tests.append(("Invalid URL Handling", False, str(e)))
+        # Evaluate error handling tests
+        passed_error_tests = sum(1 for _, passed, _ in error_tests if passed)
+        total_error_tests = len(error_tests)
+        for test_name, passed, message in error_tests:
+            print(f"   {'✅' if passed else '❌'} {test_name}: {message}")
+        self.result.add_result(
+            "Error Handling",
+            passed_error_tests >= total_error_tests * 0.8,  # 80% success rate
+            f"Error tests passed: {passed_error_tests}/{total_error_tests}",
+            {"error_test_results": error_tests}
+        )
+        return passed_error_tests > 0
+    async def cleanup_test_data(self):
+        """Clean up test data"""
+        print("\n🧹 Cleaning up test data...")
+        cleanup_count = 0
+        cleanup_errors = 0
+        # Clean up NER analyses
+        for analysis_id in self.created_analyses:
+            try:
+                # Try direct service first
+                response = await self.make_request('DELETE', f"{NER_URL}/analysis/{analysis_id}")
+                if response.status_code in [200, 404]:  # 404 is OK (already deleted)
+                    cleanup_count += 1
+                else:
+                    cleanup_errors += 1
+            except Exception as e:
+                cleanup_errors += 1
+                print(f"   ⚠️  Failed to cleanup analysis {analysis_id[:8]}...: {e}")
+        # Clean up RAG documents
+        for document_id in self.created_documents:
+            try:
+                # Try through unified proxy
+                response = await self.make_request('DELETE', f"{UNIFIED_URL}/rag/documents/{document_id}")
+                if response.status_code in [200, 404]:  # 404 is OK (already deleted)
+                    cleanup_count += 1
+                else:
+                    cleanup_errors += 1
+            except Exception as e:
+                cleanup_errors += 1
+                print(f"   ⚠️  Failed to cleanup document {document_id[:8]}...: {e}")
+        if cleanup_count > 0:
+            print(f"   ✅ Cleaned up {cleanup_count} test items")
+        if cleanup_errors > 0:
+            print(f"   ⚠️  Failed to cleanup {cleanup_errors} items")
+    async def run_comprehensive_tests(self):
+        """Run all comprehensive unified system tests"""
+        print("🚀 Unified AI Services - Comprehensive Test Suite")
+        print("Testing: NER + OCR + RAG Integration with Unified Workflows")
+        print("=" * 80)
+        start_time = time.time()
+        # Test sequence
+        tests = [
+            ("Unified App Health", self.test_unified_app_health),
+            ("Individual Service Health", self.test_individual_service_health),
+            ("Unified Analysis (Text)", self.test_unified_analysis_text),
+            ("Unified Analysis (URL)", self.test_unified_analysis_url),
+            ("Combined Search", self.test_combined_search),
+            ("Service Proxies", self.test_service_proxies),
+            ("File Upload (Unified)", self.test_file_upload_unified),
+            ("Service Discovery", self.test_service_discovery),
+            ("System Performance", self.test_system_performance),
+            ("Error Handling", self.test_error_handling)
+        ]
+        for test_name, test_func in tests:
+            print(f"\n" + "=" * 80)
+            try:
+                await test_func()
+            except Exception as e:
+                print(f"❌ {test_name} failed with exception: {e}")
+                self.result.add_result(test_name, False, f"Exception: {e}")
+        # Cleanup
+        print(f"\n" + "=" * 80)
+        await self.cleanup_test_data()
+        # Final summary
+        total_time = time.time() - start_time
+        print(f"\n" + "=" * 80)
+        print("📊 UNIFIED SYSTEM COMPREHENSIVE TEST RESULTS")
+        print("=" * 80)
+        self.result.print_summary()
+        print(f"\nTEST EXECUTION:")
+        print(f"Total Time: {total_time:.2f} seconds")
+        print(f"Tests Created: NER analyses: {len(self.created_analyses)}, RAG documents: {len(self.created_documents)}")
+        passed = self.result.passed_tests
+        total = self.result.total_tests
+        if passed == total:
+            print(f"\n🎉 ALL UNIFIED SYSTEM TESTS PASSED!")
+            print(f"✅ Unified application is fully operational")
+            print(f"✅ All services are integrated and working")
+            print(f"✅ Combined workflows are functional")
+            print(f"✅ Service proxies are working")
+            print(f"✅ Error handling is robust")
+            print(f"\n🎯 UNIFIED SYSTEM CAPABILITIES VERIFIED:")
+            print(f"   • NER + OCR + RAG service integration")
+            print(f"   • Unified analysis workflows")
+            print(f"   • Combined search with NER enhancement")
+            print(f"   • Service proxy functionality")
+            print(f"   • Multi-language support")
+            print(f"   • Concurrent request handling")
+            print(f"   • Comprehensive error handling")
+            print(f"   • Real-time service health monitoring")
+        else:
+            print(f"\n⚠️  SOME UNIFIED SYSTEM TESTS FAILED")
+            print(f"❌ {self.result.failed_tests} out of {total} tests failed")
+            print(f"\n🔧 TROUBLESHOOTING STEPS:")
+            print(f"1. Check that all services are running:")
+            print(f"   • NER Service: {NER_URL}/health")
+            print(f"   • OCR Service: {OCR_URL}/health")
+            print(f"   • RAG Service: {RAG_URL}/health")
+            print(f"   • Unified App: {UNIFIED_URL}/health")
+            print(f"2. Verify configuration in .env file")
+            print(f"3. Check service logs for errors")
+            print(f"4. Ensure all dependencies are installed")
+            print(f"5. Verify database connectivity")
+        return passed == total
+async def main():
+    """Main test runner"""
+    if len(sys.argv) > 1:
+        unified_url = sys.argv[1]
+    else:
+        unified_url = UNIFIED_URL
+    # Update global URL
+    global UNIFIED_URL
+    UNIFIED_URL = unified_url
+    print(f"🧪 Unified AI Services - Comprehensive Test Suite")
+    print(f"📡 Testing unified system at: {UNIFIED_URL}")
+    print(f"🔗 Expected services:")
+    print(f"   • NER Service: {NER_URL}")
+    print(f"   • OCR Service: {OCR_URL}")
+    print(f"   • RAG Service: {RAG_URL}")
+    print(f"   • Unified App: {UNIFIED_URL}")
+    print(f"\nMake sure the unified application is running before starting tests.")
+    print(f"Start command: python app.py")
+    # Wait for user confirmation
+    input(f"\nPress Enter to start unified system tests...")
+    async with UnifiedSystemTester() as tester:
+        success = await tester.run_comprehensive_tests()
+        if success:
+            print(f"\n🏆 UNIFIED SYSTEM VERIFICATION COMPLETE!")
+            print(f"✅ All services are integrated and operational")
+            print(f"✅ Combined workflows are working perfectly")
+            print(f"✅ Ready for production deployment")
+            sys.exit(0)
+        else:
+            print(f"\n🔧 UNIFIED SYSTEM NEEDS ATTENTION")
+            print(f"❌ Some functionality is not working correctly")
+            print(f"📋 Review the test results above for specific issues")
+            sys.exit(1)
+if __name__ == "__main__":
+    asyncio.run(main())