Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitignore +8 -0
- Dockerfile +41 -0
- README.md +90 -10
- alembic.ini +40 -0
- alembic/env.py +66 -0
- alembic/script.py.mako +1 -0
- alembic/versions/001_initial_schema.py +95 -0
- app.py +23 -0
- app/__init__.py +5 -0
- app/application/__init__.py +1 -0
- app/application/dto/__init__.py +79 -0
- app/application/services/__init__.py +5 -0
- app/application/services/chunking_service.py +97 -0
- app/application/use_cases/__init__.py +1 -0
- app/application/use_cases/document_indexing.py +129 -0
- app/application/use_cases/query_processing.py +136 -0
- app/core/__init__.py +1 -0
- app/core/config.py +121 -0
- app/core/logging.py +75 -0
- app/core/metrics.py +98 -0
- app/domain/__init__.py +1 -0
- app/domain/entities/__init__.py +15 -0
- app/domain/entities/document.py +87 -0
- app/domain/entities/query.py +113 -0
- app/domain/interfaces/__init__.py +20 -0
- app/domain/interfaces/cache.py +36 -0
- app/domain/interfaces/llm.py +72 -0
- app/domain/interfaces/repository.py +60 -0
- app/domain/interfaces/retriever.py +75 -0
- app/infrastructure/__init__.py +1 -0
- app/infrastructure/cache/__init__.py +1 -0
- app/infrastructure/cache/redis_cache.py +84 -0
- app/infrastructure/database/__init__.py +1 -0
- app/infrastructure/database/models.py +82 -0
- app/infrastructure/external/__init__.py +1 -0
- app/infrastructure/external/embedder.py +31 -0
- app/infrastructure/external/gemini_llm.py +87 -0
- app/infrastructure/external/prompt_builder.py +54 -0
- app/infrastructure/external/qdrant_retriever.py +124 -0
- app/infrastructure/external/simple_reranker.py +20 -0
- app/infrastructure/repositories/__init__.py +1 -0
- app/infrastructure/repositories/postgres_repository.py +178 -0
- app/main.py +84 -0
- app/presentation/__init__.py +1 -0
- app/presentation/api/__init__.py +1 -0
- app/presentation/api/v1/__init__.py +1 -0
- app/presentation/api/v1/endpoints.py +168 -0
- app/presentation/api/v1/schemas.py +82 -0
- requirements.txt +19 -0
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
| 3 |
+
.env
|
| 4 |
+
.env.local
|
| 5 |
+
*.log
|
| 6 |
+
.pytest_cache/
|
| 7 |
+
.coverage
|
| 8 |
+
htmlcov/
|
Dockerfile
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HuggingFace Space Dockerfile
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install system dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y \
|
| 8 |
+
build-essential \
|
| 9 |
+
curl \
|
| 10 |
+
git \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Copy requirements
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
|
| 16 |
+
# Install Python dependencies
|
| 17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 18 |
+
|
| 19 |
+
# Copy application code
|
| 20 |
+
COPY app ./app
|
| 21 |
+
COPY app.py .
|
| 22 |
+
COPY alembic ./alembic
|
| 23 |
+
COPY alembic.ini .
|
| 24 |
+
|
| 25 |
+
# Create non-root user
|
| 26 |
+
RUN useradd -m -u 1000 user && chown -R user:user /app
|
| 27 |
+
USER user
|
| 28 |
+
|
| 29 |
+
# HuggingFace Spaces uses port 7860
|
| 30 |
+
EXPOSE 7860
|
| 31 |
+
|
| 32 |
+
# Set environment
|
| 33 |
+
ENV PYTHONUNBUFFERED=1
|
| 34 |
+
ENV PORT=7860
|
| 35 |
+
|
| 36 |
+
# Health check
|
| 37 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 38 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
| 39 |
+
|
| 40 |
+
# Run application
|
| 41 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,10 +1,90 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: RAG Onboarding Backend
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# RAG Onboarding Backend - HuggingFace Space
|
| 12 |
+
|
| 13 |
+
Production-ready RAG (Retrieval-Augmented Generation) backend for corporate employee onboarding, deployed on HuggingFace Spaces.
|
| 14 |
+
|
| 15 |
+
## 🌟 Features
|
| 16 |
+
|
| 17 |
+
- **FastAPI REST API** - High-performance async API
|
| 18 |
+
- **HuggingFace Models** - Open-source LLMs and embeddings
|
| 19 |
+
- **Vector Search** - Qdrant for similarity search
|
| 20 |
+
- **Caching** - Redis for performance optimization
|
| 21 |
+
- **Monitoring** - Prometheus metrics
|
| 22 |
+
- **Clean Architecture** - Production-grade code structure
|
| 23 |
+
|
| 24 |
+
## 🚀 Quick Start
|
| 25 |
+
|
| 26 |
+
### API Endpoints
|
| 27 |
+
|
| 28 |
+
- `GET /` - Service info
|
| 29 |
+
- `GET /health` - Health check
|
| 30 |
+
- `POST /api/v1/query` - RAG query processing
|
| 31 |
+
- `GET /api/v1/metrics` - Prometheus metrics
|
| 32 |
+
- `GET /docs` - Interactive API documentation
|
| 33 |
+
|
| 34 |
+
### Example Query
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
curl -X POST "https://YOUR-SPACE-NAME.hf.space/api/v1/query" \
|
| 38 |
+
-H "Content-Type: application/json" \
|
| 39 |
+
-d '{
|
| 40 |
+
"query_text": "What is the onboarding process?",
|
| 41 |
+
"department": "HR",
|
| 42 |
+
"top_k": 5
|
| 43 |
+
}'
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
## 🔧 Configuration
|
| 47 |
+
|
| 48 |
+
Set the following secrets in your HuggingFace Space settings:
|
| 49 |
+
|
| 50 |
+
- `GEMINI_API_KEY` - Your Google Gemini API key
|
| 51 |
+
- `DATABASE_URL` - PostgreSQL connection string (use external DB like Supabase/Neon)
|
| 52 |
+
- `REDIS_URL` - Redis connection string (use Upstash Redis)
|
| 53 |
+
- `QDRANT_URL` - Qdrant vector DB URL (use Qdrant Cloud)
|
| 54 |
+
- `QDRANT_API_KEY` - Qdrant API key (if using cloud)
|
| 55 |
+
|
| 56 |
+
## 📊 Models Used
|
| 57 |
+
|
| 58 |
+
- **LLM**: Google Gemini 2.0 Flash (via API)
|
| 59 |
+
- **Embeddings**: `sentence-transformers/all-MiniLM-L6-v2`
|
| 60 |
+
- **Reranking**: `cross-encoder/ms-marco-MiniLM-L-12-v2` (optional)
|
| 61 |
+
|
| 62 |
+
## 🏗️ Architecture
|
| 63 |
+
|
| 64 |
+
```
|
| 65 |
+
┌─────────────┐
|
| 66 |
+
│ Client │
|
| 67 |
+
└──────┬──────┘
|
| 68 |
+
│
|
| 69 |
+
▼
|
| 70 |
+
┌─────────────┐
|
| 71 |
+
│ FastAPI │
|
| 72 |
+
└──────┬──────┘
|
| 73 |
+
│
|
| 74 |
+
├─────► PostgreSQL (Documents)
|
| 75 |
+
├─────► Redis (Cache)
|
| 76 |
+
├─────► Qdrant (Vectors)
|
| 77 |
+
└─────► Google Gemini (LLM)
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
## 📝 License
|
| 81 |
+
|
| 82 |
+
MIT License - See LICENSE file for details
|
| 83 |
+
|
| 84 |
+
## 🤝 Contributing
|
| 85 |
+
|
| 86 |
+
Contributions welcome! Please open an issue or submit a PR.
|
| 87 |
+
|
| 88 |
+
## 📧 Support
|
| 89 |
+
|
| 90 |
+
For questions or issues, please open a GitHub issue.
|
alembic.ini
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[alembic]
|
| 2 |
+
script_location = alembic
|
| 3 |
+
prepend_sys_path = .
|
| 4 |
+
version_path_separator = os
|
| 5 |
+
|
| 6 |
+
sqlalchemy.url = postgresql+asyncpg://postgres:postgres@localhost:5432/rag_onboarding
|
| 7 |
+
|
| 8 |
+
[loggers]
|
| 9 |
+
keys = root,sqlalchemy,alembic
|
| 10 |
+
|
| 11 |
+
[handlers]
|
| 12 |
+
keys = console
|
| 13 |
+
|
| 14 |
+
[formatters]
|
| 15 |
+
keys = generic
|
| 16 |
+
|
| 17 |
+
[logger_root]
|
| 18 |
+
level = WARN
|
| 19 |
+
handlers = console
|
| 20 |
+
qualname =
|
| 21 |
+
|
| 22 |
+
[logger_sqlalchemy]
|
| 23 |
+
level = WARN
|
| 24 |
+
handlers =
|
| 25 |
+
qualname = sqlalchemy.engine
|
| 26 |
+
|
| 27 |
+
[logger_alembic]
|
| 28 |
+
level = INFO
|
| 29 |
+
handlers =
|
| 30 |
+
qualname = alembic
|
| 31 |
+
|
| 32 |
+
[handler_console]
|
| 33 |
+
class = StreamHandler
|
| 34 |
+
args = (sys.stderr,)
|
| 35 |
+
level = NOTSET
|
| 36 |
+
formatter = generic
|
| 37 |
+
|
| 38 |
+
[formatter_generic]
|
| 39 |
+
format = %(levelname)-5.5s [%(name)s] %(message)s
|
| 40 |
+
datefmt = %H:%M:%S
|
alembic/env.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Alembic environment configuration"""
|
| 2 |
+
import asyncio
|
| 3 |
+
from logging.config import fileConfig
|
| 4 |
+
|
| 5 |
+
from alembic import context
|
| 6 |
+
from sqlalchemy import pool
|
| 7 |
+
from sqlalchemy.engine import Connection
|
| 8 |
+
from sqlalchemy.ext.asyncio import async_engine_from_config
|
| 9 |
+
|
| 10 |
+
from app.infrastructure.database.models import Base
|
| 11 |
+
|
| 12 |
+
# Alembic Config object
|
| 13 |
+
config = context.config
|
| 14 |
+
|
| 15 |
+
# Interpret the config file for Python logging
|
| 16 |
+
if config.config_file_name is not None:
|
| 17 |
+
fileConfig(config.config_file_name)
|
| 18 |
+
|
| 19 |
+
# Metadata for autogenerate
|
| 20 |
+
target_metadata = Base.metadata
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def run_migrations_offline() -> None:
|
| 24 |
+
"""Run migrations in 'offline' mode."""
|
| 25 |
+
url = config.get_main_option("sqlalchemy.url")
|
| 26 |
+
context.configure(
|
| 27 |
+
url=url,
|
| 28 |
+
target_metadata=target_metadata,
|
| 29 |
+
literal_binds=True,
|
| 30 |
+
dialect_opts={"paramstyle": "named"},
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
with context.begin_transaction():
|
| 34 |
+
context.run_migrations()
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def do_run_migrations(connection: Connection) -> None:
|
| 38 |
+
context.configure(connection=connection, target_metadata=target_metadata)
|
| 39 |
+
|
| 40 |
+
with context.begin_transaction():
|
| 41 |
+
context.run_migrations()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
async def run_async_migrations() -> None:
|
| 45 |
+
"""Run migrations in 'online' mode."""
|
| 46 |
+
connectable = async_engine_from_config(
|
| 47 |
+
config.get_section(config.config_ini_section, {}),
|
| 48 |
+
prefix="sqlalchemy.",
|
| 49 |
+
poolclass=pool.NullPool,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
async with connectable.connect() as connection:
|
| 53 |
+
await connection.run_sync(do_run_migrations)
|
| 54 |
+
|
| 55 |
+
await connectable.dispose()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def run_migrations_online() -> None:
|
| 59 |
+
"""Run migrations in 'online' mode."""
|
| 60 |
+
asyncio.run(run_async_migrations())
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
if context.is_offline_mode():
|
| 64 |
+
run_migrations_offline()
|
| 65 |
+
else:
|
| 66 |
+
run_migrations_online()
|
alembic/script.py.mako
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Script configuration"""
|
alembic/versions/001_initial_schema.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Database Migration - Initial Schema
|
| 3 |
+
|
| 4 |
+
Create tables for documents, chunks, and queries.
|
| 5 |
+
"""
|
| 6 |
+
from alembic import op
|
| 7 |
+
import sqlalchemy as sa
|
| 8 |
+
from sqlalchemy.dialects import postgresql
|
| 9 |
+
|
| 10 |
+
# revision identifiers
|
| 11 |
+
revision = '001'
|
| 12 |
+
down_revision = None
|
| 13 |
+
branch_labels = None
|
| 14 |
+
depends_on = None
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def upgrade() -> None:
|
| 18 |
+
# Create documents table
|
| 19 |
+
op.create_table(
|
| 20 |
+
'documents',
|
| 21 |
+
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
|
| 22 |
+
sa.Column('title', sa.String(500), nullable=False),
|
| 23 |
+
sa.Column('filename', sa.String(255), nullable=False),
|
| 24 |
+
sa.Column('file_type', sa.String(50), nullable=False),
|
| 25 |
+
sa.Column('file_size', sa.BigInteger(), nullable=False),
|
| 26 |
+
sa.Column('storage_path', sa.String(1000), nullable=False),
|
| 27 |
+
sa.Column('department', sa.String(100), nullable=False),
|
| 28 |
+
sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
|
| 29 |
+
sa.Column('upload_session_id', sa.String(100), nullable=True),
|
| 30 |
+
sa.Column('uploaded_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
|
| 31 |
+
sa.Column('indexed_at', sa.DateTime(timezone=True), nullable=True),
|
| 32 |
+
sa.Column('metadata', postgresql.JSONB(), nullable=False, server_default='{}'),
|
| 33 |
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
|
| 34 |
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Create indexes for documents
|
| 38 |
+
op.create_index('ix_documents_title', 'documents', ['title'])
|
| 39 |
+
op.create_index('ix_documents_file_type', 'documents', ['file_type'])
|
| 40 |
+
op.create_index('ix_documents_department', 'documents', ['department'])
|
| 41 |
+
op.create_index('ix_documents_status', 'documents', ['status'])
|
| 42 |
+
op.create_index('ix_documents_department_status', 'documents', ['department', 'status'])
|
| 43 |
+
op.create_index('ix_documents_created_at', 'documents', ['created_at'])
|
| 44 |
+
|
| 45 |
+
# Create document_chunks table
|
| 46 |
+
op.create_table(
|
| 47 |
+
'document_chunks',
|
| 48 |
+
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
|
| 49 |
+
sa.Column('document_id', postgresql.UUID(as_uuid=True), nullable=False),
|
| 50 |
+
sa.Column('chunk_index', sa.Integer(), nullable=False),
|
| 51 |
+
sa.Column('content', sa.Text(), nullable=False),
|
| 52 |
+
sa.Column('token_count', sa.Integer(), nullable=False),
|
| 53 |
+
sa.Column('vector_id', sa.String(100), nullable=True),
|
| 54 |
+
sa.Column('metadata', postgresql.JSONB(), nullable=False, server_default='{}'),
|
| 55 |
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Create indexes for chunks
|
| 59 |
+
op.create_index('ix_chunks_document_id', 'document_chunks', ['document_id'])
|
| 60 |
+
op.create_index('ix_chunks_vector_id', 'document_chunks', ['vector_id'])
|
| 61 |
+
op.create_index('ix_chunks_document_id_index', 'document_chunks', ['document_id', 'chunk_index'])
|
| 62 |
+
|
| 63 |
+
# Create queries table
|
| 64 |
+
op.create_table(
|
| 65 |
+
'queries',
|
| 66 |
+
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
|
| 67 |
+
sa.Column('query_text', sa.Text(), nullable=False),
|
| 68 |
+
sa.Column('department', sa.String(100), nullable=False),
|
| 69 |
+
sa.Column('user_id', sa.String(100), nullable=True),
|
| 70 |
+
sa.Column('session_id', sa.String(100), nullable=True),
|
| 71 |
+
sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
|
| 72 |
+
sa.Column('answer', sa.Text(), nullable=True),
|
| 73 |
+
sa.Column('sources', postgresql.JSONB(), nullable=False, server_default='[]'),
|
| 74 |
+
sa.Column('confidence', sa.Integer(), nullable=False, server_default='0'),
|
| 75 |
+
sa.Column('duration_ms', sa.Integer(), nullable=False, server_default='0'),
|
| 76 |
+
sa.Column('tokens_used', sa.Integer(), nullable=False, server_default='0'),
|
| 77 |
+
sa.Column('model', sa.String(100), nullable=True),
|
| 78 |
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
|
| 79 |
+
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# Create indexes for queries
|
| 83 |
+
op.create_index('ix_queries_department', 'queries', ['department'])
|
| 84 |
+
op.create_index('ix_queries_user_id', 'queries', ['user_id'])
|
| 85 |
+
op.create_index('ix_queries_session_id', 'queries', ['session_id'])
|
| 86 |
+
op.create_index('ix_queries_status', 'queries', ['status'])
|
| 87 |
+
op.create_index('ix_queries_created_at', 'queries', ['created_at'])
|
| 88 |
+
op.create_index('ix_queries_department_created', 'queries', ['department', 'created_at'])
|
| 89 |
+
op.create_index('ix_queries_user_created', 'queries', ['user_id', 'created_at'])
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def downgrade() -> None:
|
| 93 |
+
op.drop_table('queries')
|
| 94 |
+
op.drop_table('document_chunks')
|
| 95 |
+
op.drop_table('documents')
|
app.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HuggingFace Space Entry Point
|
| 3 |
+
Wraps FastAPI app for Gradio/Spaces deployment
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
# Add app to path
|
| 9 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
| 10 |
+
|
| 11 |
+
from app.main import app
|
| 12 |
+
|
| 13 |
+
# For HF Spaces
|
| 14 |
+
if __name__ == "__main__":
|
| 15 |
+
import uvicorn
|
| 16 |
+
|
| 17 |
+
port = int(os.getenv("PORT", 7860)) # HF Spaces default port
|
| 18 |
+
uvicorn.run(
|
| 19 |
+
app,
|
| 20 |
+
host="0.0.0.0",
|
| 21 |
+
port=port,
|
| 22 |
+
log_level="info"
|
| 23 |
+
)
|
app/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RAG Onboarding Backend - Production-ready RAG system for employee onboarding
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
__version__ = "1.0.0"
|
app/application/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Application layer"""
|
app/application/dto/__init__.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Application Layer - DTOs (Data Transfer Objects)
|
| 3 |
+
"""
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from typing import List, Optional
|
| 6 |
+
from uuid import UUID
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@dataclass
|
| 10 |
+
class QueryDTO:
|
| 11 |
+
"""Query data transfer object"""
|
| 12 |
+
|
| 13 |
+
query_text: str
|
| 14 |
+
department: str
|
| 15 |
+
user_id: Optional[str] = None
|
| 16 |
+
session_id: Optional[str] = None
|
| 17 |
+
top_k: int = 10
|
| 18 |
+
temperature: float = 0.7
|
| 19 |
+
max_tokens: int = 2048
|
| 20 |
+
filters: dict = None
|
| 21 |
+
|
| 22 |
+
def __post_init__(self) -> None:
|
| 23 |
+
if self.filters is None:
|
| 24 |
+
self.filters = {}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class SourceDTO:
|
| 29 |
+
"""Source citation DTO"""
|
| 30 |
+
|
| 31 |
+
title: str
|
| 32 |
+
content: str
|
| 33 |
+
relevance_score: float
|
| 34 |
+
document_id: str
|
| 35 |
+
chunk_index: int
|
| 36 |
+
metadata: dict
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@dataclass
|
| 40 |
+
class QueryResponseDTO:
|
| 41 |
+
"""Query response DTO"""
|
| 42 |
+
|
| 43 |
+
query_id: str
|
| 44 |
+
answer: str
|
| 45 |
+
sources: List[SourceDTO]
|
| 46 |
+
confidence: float
|
| 47 |
+
processing_time_ms: int
|
| 48 |
+
tokens_used: int
|
| 49 |
+
model: str
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@dataclass
|
| 53 |
+
class DocumentUploadDTO:
|
| 54 |
+
"""Document upload DTO"""
|
| 55 |
+
|
| 56 |
+
filename: str
|
| 57 |
+
content: bytes
|
| 58 |
+
department: str
|
| 59 |
+
metadata: dict = None
|
| 60 |
+
|
| 61 |
+
def __post_init__(self) -> None:
|
| 62 |
+
if self.metadata is None:
|
| 63 |
+
self.metadata = {}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@dataclass
|
| 67 |
+
class DocumentDTO:
|
| 68 |
+
"""Document DTO"""
|
| 69 |
+
|
| 70 |
+
id: str
|
| 71 |
+
title: str
|
| 72 |
+
filename: str
|
| 73 |
+
file_type: str
|
| 74 |
+
file_size: int
|
| 75 |
+
department: str
|
| 76 |
+
status: str
|
| 77 |
+
uploaded_at: str
|
| 78 |
+
indexed_at: Optional[str] = None
|
| 79 |
+
metadata: dict = None
|
app/application/services/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Application services"""
|
| 2 |
+
|
| 3 |
+
from app.application.services.chunking_service import ChunkingService
|
| 4 |
+
|
| 5 |
+
__all__ = ["ChunkingService"]
|
app/application/services/chunking_service.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Application Layer - Chunking Service
|
| 3 |
+
|
| 4 |
+
Handles intelligent document chunking.
|
| 5 |
+
"""
|
| 6 |
+
import re
|
| 7 |
+
from typing import List
|
| 8 |
+
from uuid import UUID
|
| 9 |
+
|
| 10 |
+
from app.domain.entities import DocumentChunk
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class ChunkingService:
|
| 14 |
+
"""Service for chunking documents intelligently"""
|
| 15 |
+
|
| 16 |
+
def __init__(
|
| 17 |
+
self,
|
| 18 |
+
chunk_size: int = 800,
|
| 19 |
+
chunk_overlap: int = 100,
|
| 20 |
+
min_chunk_size: int = 100,
|
| 21 |
+
):
|
| 22 |
+
self.chunk_size = chunk_size
|
| 23 |
+
self.chunk_overlap = chunk_overlap
|
| 24 |
+
self.min_chunk_size = min_chunk_size
|
| 25 |
+
|
| 26 |
+
async def chunk_text(
|
| 27 |
+
self, text: str, document_id: UUID, metadata: dict = None
|
| 28 |
+
) -> List[DocumentChunk]:
|
| 29 |
+
"""Chunk text using semantic boundaries"""
|
| 30 |
+
if metadata is None:
|
| 31 |
+
metadata = {}
|
| 32 |
+
|
| 33 |
+
# 1. Split by paragraphs
|
| 34 |
+
paragraphs = self._split_paragraphs(text)
|
| 35 |
+
|
| 36 |
+
# 2. Combine into chunks
|
| 37 |
+
chunks = []
|
| 38 |
+
current_chunk = []
|
| 39 |
+
current_size = 0
|
| 40 |
+
|
| 41 |
+
for i, para in enumerate(paragraphs):
|
| 42 |
+
para_tokens = self._count_tokens(para)
|
| 43 |
+
|
| 44 |
+
if current_size + para_tokens > self.chunk_size and current_chunk:
|
| 45 |
+
# Flush current chunk
|
| 46 |
+
chunk_text = "\n\n".join(current_chunk)
|
| 47 |
+
chunks.append(chunk_text)
|
| 48 |
+
|
| 49 |
+
# Start new chunk with overlap
|
| 50 |
+
overlap_text = self._get_overlap(current_chunk)
|
| 51 |
+
current_chunk = [overlap_text, para] if overlap_text else [para]
|
| 52 |
+
current_size = self._count_tokens("\n\n".join(current_chunk))
|
| 53 |
+
else:
|
| 54 |
+
current_chunk.append(para)
|
| 55 |
+
current_size += para_tokens
|
| 56 |
+
|
| 57 |
+
# Flush remaining
|
| 58 |
+
if current_chunk:
|
| 59 |
+
chunks.append("\n\n".join(current_chunk))
|
| 60 |
+
|
| 61 |
+
# 3. Create DocumentChunk entities
|
| 62 |
+
return [
|
| 63 |
+
DocumentChunk(
|
| 64 |
+
document_id=document_id,
|
| 65 |
+
chunk_index=idx,
|
| 66 |
+
content=chunk,
|
| 67 |
+
token_count=self._count_tokens(chunk),
|
| 68 |
+
metadata=metadata,
|
| 69 |
+
)
|
| 70 |
+
for idx, chunk in enumerate(chunks)
|
| 71 |
+
if self._count_tokens(chunk) >= self.min_chunk_size
|
| 72 |
+
]
|
| 73 |
+
|
| 74 |
+
def _split_paragraphs(self, text: str) -> List[str]:
|
| 75 |
+
"""Split text into paragraphs"""
|
| 76 |
+
# Split by double newlines, headers, etc.
|
| 77 |
+
paragraphs = re.split(r"\n\s*\n", text)
|
| 78 |
+
return [p.strip() for p in paragraphs if p.strip()]
|
| 79 |
+
|
| 80 |
+
def _count_tokens(self, text: str) -> int:
|
| 81 |
+
"""Approximate token count (1 token ≈ 4 chars)"""
|
| 82 |
+
return len(text) // 4
|
| 83 |
+
|
| 84 |
+
def _get_overlap(self, chunks: List[str]) -> str:
|
| 85 |
+
"""Get overlap text from previous chunks"""
|
| 86 |
+
if not chunks:
|
| 87 |
+
return ""
|
| 88 |
+
|
| 89 |
+
# Take last chunk and truncate to overlap size
|
| 90 |
+
last_chunk = chunks[-1]
|
| 91 |
+
tokens = last_chunk.split()
|
| 92 |
+
overlap_tokens = int(self.chunk_overlap * 0.25) # Rough token estimate
|
| 93 |
+
|
| 94 |
+
if len(tokens) <= overlap_tokens:
|
| 95 |
+
return last_chunk
|
| 96 |
+
|
| 97 |
+
return " ".join(tokens[-overlap_tokens:])
|
app/application/use_cases/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Use cases"""
|
app/application/use_cases/document_indexing.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Application Layer - Document Indexing Use Case
|
| 3 |
+
|
| 4 |
+
Handles document upload and indexing into the knowledge base.
|
| 5 |
+
"""
|
| 6 |
+
import hashlib
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import List
|
| 9 |
+
from uuid import uuid4
|
| 10 |
+
|
| 11 |
+
from app.application.dto import DocumentDTO, DocumentUploadDTO
|
| 12 |
+
from app.domain.entities import Document, DocumentChunk, DocumentStatus, DocumentType
|
| 13 |
+
from app.domain.interfaces import IChunkRepository, IDocumentRepository, IEmbedder
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class DocumentIndexingUseCase:
|
| 17 |
+
"""Use case for indexing documents into the knowledge base"""
|
| 18 |
+
|
| 19 |
+
def __init__(
|
| 20 |
+
self,
|
| 21 |
+
document_repository: IDocumentRepository,
|
| 22 |
+
chunk_repository: IChunkRepository,
|
| 23 |
+
embedder: IEmbedder,
|
| 24 |
+
chunking_service: "ChunkingService",
|
| 25 |
+
):
|
| 26 |
+
self.document_repository = document_repository
|
| 27 |
+
self.chunk_repository = chunk_repository
|
| 28 |
+
self.embedder = embedder
|
| 29 |
+
self.chunking_service = chunking_service
|
| 30 |
+
|
| 31 |
+
async def execute(self, upload_dto: DocumentUploadDTO) -> DocumentDTO:
|
| 32 |
+
"""Execute document indexing pipeline"""
|
| 33 |
+
|
| 34 |
+
# 1. Detect file type
|
| 35 |
+
file_type = self._detect_file_type(upload_dto.filename)
|
| 36 |
+
|
| 37 |
+
# 2. Create document entity
|
| 38 |
+
document = Document(
|
| 39 |
+
title=self._extract_title(upload_dto.filename),
|
| 40 |
+
filename=upload_dto.filename,
|
| 41 |
+
file_type=file_type,
|
| 42 |
+
file_size=len(upload_dto.content),
|
| 43 |
+
storage_path=self._generate_storage_path(upload_dto.filename),
|
| 44 |
+
department=upload_dto.department,
|
| 45 |
+
metadata=upload_dto.metadata,
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# 3. Save document to repository
|
| 49 |
+
saved_document = await self.document_repository.create(document)
|
| 50 |
+
|
| 51 |
+
# 4. Mark as processing
|
| 52 |
+
saved_document.mark_as_processing()
|
| 53 |
+
await self.document_repository.update(saved_document)
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
# 5. Extract text content
|
| 57 |
+
text_content = await self._extract_text(upload_dto.content, file_type)
|
| 58 |
+
|
| 59 |
+
# 6. Chunk the document
|
| 60 |
+
chunks_data = await self.chunking_service.chunk_text(
|
| 61 |
+
text=text_content, document_id=saved_document.id, metadata=upload_dto.metadata
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# 7. Generate embeddings
|
| 65 |
+
texts = [chunk.content for chunk in chunks_data]
|
| 66 |
+
embeddings = await self.embedder.embed_texts(texts)
|
| 67 |
+
|
| 68 |
+
# 8. Store chunks with embeddings
|
| 69 |
+
# (Vector storage will be handled in infrastructure layer)
|
| 70 |
+
chunks = await self.chunk_repository.create_bulk(chunks_data)
|
| 71 |
+
|
| 72 |
+
# 9. Mark document as indexed
|
| 73 |
+
saved_document.mark_as_indexed()
|
| 74 |
+
await self.document_repository.update(saved_document)
|
| 75 |
+
|
| 76 |
+
# 10. Return DTO
|
| 77 |
+
return self._to_dto(saved_document)
|
| 78 |
+
|
| 79 |
+
except Exception as e:
|
| 80 |
+
# Mark as failed
|
| 81 |
+
saved_document.mark_as_failed()
|
| 82 |
+
await self.document_repository.update(saved_document)
|
| 83 |
+
raise
|
| 84 |
+
|
| 85 |
+
def _detect_file_type(self, filename: str) -> DocumentType:
|
| 86 |
+
"""Detect file type from filename"""
|
| 87 |
+
suffix = Path(filename).suffix.lower()
|
| 88 |
+
type_map = {
|
| 89 |
+
".pdf": DocumentType.PDF,
|
| 90 |
+
".docx": DocumentType.DOCX,
|
| 91 |
+
".txt": DocumentType.TXT,
|
| 92 |
+
".md": DocumentType.MD,
|
| 93 |
+
".html": DocumentType.HTML,
|
| 94 |
+
}
|
| 95 |
+
return type_map.get(suffix, DocumentType.TXT)
|
| 96 |
+
|
| 97 |
+
def _extract_title(self, filename: str) -> str:
|
| 98 |
+
"""Extract title from filename"""
|
| 99 |
+
return Path(filename).stem.replace("_", " ").replace("-", " ").title()
|
| 100 |
+
|
| 101 |
+
def _generate_storage_path(self, filename: str) -> str:
|
| 102 |
+
"""Generate unique storage path"""
|
| 103 |
+
file_hash = hashlib.md5(f"{uuid4()}{filename}".encode()).hexdigest()
|
| 104 |
+
return f"documents/{file_hash[:2]}/{file_hash}/{filename}"
|
| 105 |
+
|
| 106 |
+
async def _extract_text(self, content: bytes, file_type: DocumentType) -> str:
|
| 107 |
+
"""Extract text from document content"""
|
| 108 |
+
# Simplified - in production use proper libraries
|
| 109 |
+
# (PyPDF2, python-docx, BeautifulSoup, etc.)
|
| 110 |
+
if file_type == DocumentType.TXT or file_type == DocumentType.MD:
|
| 111 |
+
return content.decode("utf-8")
|
| 112 |
+
else:
|
| 113 |
+
# Placeholder - implement proper extraction
|
| 114 |
+
return content.decode("utf-8", errors="ignore")
|
| 115 |
+
|
| 116 |
+
def _to_dto(self, document: Document) -> DocumentDTO:
|
| 117 |
+
"""Convert Document entity to DTO"""
|
| 118 |
+
return DocumentDTO(
|
| 119 |
+
id=str(document.id),
|
| 120 |
+
title=document.title,
|
| 121 |
+
filename=document.filename,
|
| 122 |
+
file_type=document.file_type.value,
|
| 123 |
+
file_size=document.file_size,
|
| 124 |
+
department=document.department,
|
| 125 |
+
status=document.status.value,
|
| 126 |
+
uploaded_at=document.uploaded_at.isoformat(),
|
| 127 |
+
indexed_at=document.indexed_at.isoformat() if document.indexed_at else None,
|
| 128 |
+
metadata=document.metadata,
|
| 129 |
+
)
|
app/application/use_cases/query_processing.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Application Layer - Query Processing Use Case
|
| 3 |
+
|
| 4 |
+
Orchestrates the RAG pipeline for answering user queries.
|
| 5 |
+
"""
|
| 6 |
+
import time
|
| 7 |
+
from typing import List
|
| 8 |
+
|
| 9 |
+
from app.application.dto import QueryDTO, QueryResponseDTO, SourceDTO
|
| 10 |
+
from app.domain.entities import Query, QueryRequest, Source
|
| 11 |
+
from app.domain.interfaces import ILLM, ICache, IPromptBuilder, IReranker, IRetriever
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class QueryProcessingUseCase:
|
| 15 |
+
"""Use case for processing user queries through RAG pipeline"""
|
| 16 |
+
|
| 17 |
+
def __init__(
|
| 18 |
+
self,
|
| 19 |
+
retriever: IRetriever,
|
| 20 |
+
reranker: IReranker,
|
| 21 |
+
llm: ILLM,
|
| 22 |
+
prompt_builder: IPromptBuilder,
|
| 23 |
+
cache: ICache,
|
| 24 |
+
):
|
| 25 |
+
self.retriever = retriever
|
| 26 |
+
self.reranker = reranker
|
| 27 |
+
self.llm = llm
|
| 28 |
+
self.prompt_builder = prompt_builder
|
| 29 |
+
self.cache = cache
|
| 30 |
+
|
| 31 |
+
async def execute(self, query_dto: QueryDTO) -> QueryResponseDTO:
|
| 32 |
+
"""Execute query processing pipeline"""
|
| 33 |
+
start_time = time.time()
|
| 34 |
+
|
| 35 |
+
# 1. Create query request
|
| 36 |
+
query_request = QueryRequest(
|
| 37 |
+
query_text=query_dto.query_text,
|
| 38 |
+
department=query_dto.department,
|
| 39 |
+
user_id=query_dto.user_id,
|
| 40 |
+
session_id=query_dto.session_id,
|
| 41 |
+
top_k=query_dto.top_k,
|
| 42 |
+
temperature=query_dto.temperature,
|
| 43 |
+
max_tokens=query_dto.max_tokens,
|
| 44 |
+
filters=query_dto.filters,
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# 2. Check semantic cache
|
| 48 |
+
cache_key = f"query:{hash(query_dto.query_text)}:{query_dto.department}"
|
| 49 |
+
cached_response = await self.cache.get(cache_key)
|
| 50 |
+
if cached_response:
|
| 51 |
+
return cached_response
|
| 52 |
+
|
| 53 |
+
# 3. Retrieve relevant documents
|
| 54 |
+
filters = {"department": query_dto.department}
|
| 55 |
+
if query_dto.filters:
|
| 56 |
+
filters.update(query_dto.filters)
|
| 57 |
+
|
| 58 |
+
retrieval_results = await self.retriever.hybrid_search(
|
| 59 |
+
query=query_dto.query_text,
|
| 60 |
+
top_k=100, # Initial retrieval
|
| 61 |
+
alpha=0.5,
|
| 62 |
+
filters=filters,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# 4. Rerank results
|
| 66 |
+
reranked_results = await self.reranker.rerank(
|
| 67 |
+
query=query_dto.query_text, results=retrieval_results, top_k=query_dto.top_k
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# 5. Build context
|
| 71 |
+
context = [result.content for result in reranked_results]
|
| 72 |
+
|
| 73 |
+
# 6. Build prompt
|
| 74 |
+
messages = self.prompt_builder.build_rag_prompt(
|
| 75 |
+
query=query_dto.query_text,
|
| 76 |
+
context=context,
|
| 77 |
+
system_prompt=self._get_system_prompt(query_dto.department),
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# 7. Generate answer
|
| 81 |
+
llm_response = await self.llm.generate(
|
| 82 |
+
messages=messages,
|
| 83 |
+
temperature=query_dto.temperature,
|
| 84 |
+
max_tokens=query_dto.max_tokens,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# 8. Create sources
|
| 88 |
+
sources = [
|
| 89 |
+
SourceDTO(
|
| 90 |
+
title=f"Document {result.document_id}",
|
| 91 |
+
content=result.content[:500], # Truncate for response
|
| 92 |
+
relevance_score=result.score,
|
| 93 |
+
document_id=result.document_id,
|
| 94 |
+
chunk_index=result.chunk_index,
|
| 95 |
+
metadata=result.metadata,
|
| 96 |
+
)
|
| 97 |
+
for result in reranked_results
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
# 9. Calculate metrics
|
| 101 |
+
processing_time_ms = int((time.time() - start_time) * 1000)
|
| 102 |
+
|
| 103 |
+
# 10. Build response
|
| 104 |
+
response = QueryResponseDTO(
|
| 105 |
+
query_id=str(query_request.id) if hasattr(query_request, "id") else "temp",
|
| 106 |
+
answer=llm_response.content,
|
| 107 |
+
sources=sources,
|
| 108 |
+
confidence=self._calculate_confidence(reranked_results),
|
| 109 |
+
processing_time_ms=processing_time_ms,
|
| 110 |
+
tokens_used=llm_response.tokens_used,
|
| 111 |
+
model=llm_response.model,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# 11. Cache response
|
| 115 |
+
await self.cache.set(cache_key, response, ttl=3600)
|
| 116 |
+
|
| 117 |
+
return response
|
| 118 |
+
|
| 119 |
+
def _get_system_prompt(self, department: str) -> str:
|
| 120 |
+
"""Get department-specific system prompt"""
|
| 121 |
+
prompts = {
|
| 122 |
+
"HR": "You are a helpful HR assistant for employee onboarding. Provide clear, accurate information about HR policies, benefits, and procedures.",
|
| 123 |
+
"IT": "You are an IT support assistant for new employees. Help with technical setup, access, and IT policies.",
|
| 124 |
+
"Legal": "You are a legal compliance assistant. Provide information about legal policies, regulations, and compliance requirements.",
|
| 125 |
+
"Finance": "You are a finance assistant. Help with expense policies, financial procedures, and budget information.",
|
| 126 |
+
"General": "You are a helpful corporate onboarding assistant. Provide accurate information to help new employees integrate successfully.",
|
| 127 |
+
}
|
| 128 |
+
return prompts.get(department, prompts["General"])
|
| 129 |
+
|
| 130 |
+
def _calculate_confidence(self, results: List) -> float:
|
| 131 |
+
"""Calculate confidence score based on retrieval results"""
|
| 132 |
+
if not results:
|
| 133 |
+
return 0.0
|
| 134 |
+
# Average of top 3 scores
|
| 135 |
+
top_scores = [r.score for r in results[:3]]
|
| 136 |
+
return sum(top_scores) / len(top_scores) if top_scores else 0.0
|
app/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Core utilities"""
|
app/core/config.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Core - Configuration Management
|
| 3 |
+
|
| 4 |
+
Handles application configuration using Pydantic Settings.
|
| 5 |
+
"""
|
| 6 |
+
from functools import lru_cache
|
| 7 |
+
from typing import List
|
| 8 |
+
|
| 9 |
+
from pydantic import Field
|
| 10 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class Settings(BaseSettings):
|
| 14 |
+
"""Application settings"""
|
| 15 |
+
|
| 16 |
+
model_config = SettingsConfigDict(env_file=".env", case_sensitive=False)
|
| 17 |
+
|
| 18 |
+
# Application
|
| 19 |
+
app_name: str = Field(default="RAG Onboarding Backend")
|
| 20 |
+
app_version: str = Field(default="1.0.0")
|
| 21 |
+
environment: str = Field(default="development")
|
| 22 |
+
debug: bool = Field(default=True)
|
| 23 |
+
log_level: str = Field(default="INFO")
|
| 24 |
+
|
| 25 |
+
# Server
|
| 26 |
+
host: str = Field(default="0.0.0.0")
|
| 27 |
+
port: int = Field(default=8000)
|
| 28 |
+
workers: int = Field(default=4)
|
| 29 |
+
|
| 30 |
+
# Database
|
| 31 |
+
database_url: str = Field(
|
| 32 |
+
default="postgresql+asyncpg://postgres:postgres@localhost:5432/rag_onboarding"
|
| 33 |
+
)
|
| 34 |
+
database_pool_size: int = Field(default=20)
|
| 35 |
+
database_max_overflow: int = Field(default=0)
|
| 36 |
+
|
| 37 |
+
# Redis
|
| 38 |
+
redis_url: str = Field(default="redis://localhost:6379/0")
|
| 39 |
+
redis_cache_ttl: int = Field(default=3600)
|
| 40 |
+
|
| 41 |
+
# Qdrant
|
| 42 |
+
qdrant_url: str = Field(default="http://localhost:6333")
|
| 43 |
+
qdrant_api_key: str = Field(default="")
|
| 44 |
+
qdrant_collection_name: str = Field(default="onboarding_documents")
|
| 45 |
+
qdrant_vector_size: int = Field(default=384)
|
| 46 |
+
|
| 47 |
+
# RabbitMQ / Celery
|
| 48 |
+
rabbitmq_url: str = Field(default="amqp://guest:guest@localhost:5672/")
|
| 49 |
+
celery_broker_url: str = Field(default="redis://localhost:6379/1")
|
| 50 |
+
celery_result_backend: str = Field(default="redis://localhost:6379/2")
|
| 51 |
+
|
| 52 |
+
# Gemini
|
| 53 |
+
gemini_api_key: str = Field(default="")
|
| 54 |
+
gemini_model: str = Field(default="gemini-2.0-flash")
|
| 55 |
+
gemini_temperature: float = Field(default=0.7)
|
| 56 |
+
gemini_max_tokens: int = Field(default=2048)
|
| 57 |
+
|
| 58 |
+
# OpenAI (fallback)
|
| 59 |
+
openai_api_key: str = Field(default="")
|
| 60 |
+
openai_model: str = Field(default="gpt-4-turbo-preview")
|
| 61 |
+
|
| 62 |
+
# Embeddings
|
| 63 |
+
embedding_model: str = Field(default="sentence-transformers/all-MiniLM-L6-v2")
|
| 64 |
+
embedding_dimension: int = Field(default=384)
|
| 65 |
+
embedding_batch_size: int = Field(default=32)
|
| 66 |
+
|
| 67 |
+
# RAG Configuration
|
| 68 |
+
rag_initial_k: int = Field(default=100)
|
| 69 |
+
rag_final_k: int = Field(default=10)
|
| 70 |
+
rag_min_score: float = Field(default=0.7)
|
| 71 |
+
rag_search_type: str = Field(default="hybrid")
|
| 72 |
+
rag_hybrid_alpha: float = Field(default=0.5)
|
| 73 |
+
rag_max_context_tokens: int = Field(default=4000)
|
| 74 |
+
|
| 75 |
+
# Reranking
|
| 76 |
+
rerank_model: str = Field(default="cross-encoder/ms-marco-MiniLM-L-12-v2")
|
| 77 |
+
use_reranking: bool = Field(default=True)
|
| 78 |
+
|
| 79 |
+
# Caching
|
| 80 |
+
enable_semantic_cache: bool = Field(default=True)
|
| 81 |
+
cache_embedding_ttl: int = Field(default=86400)
|
| 82 |
+
cache_retrieval_ttl: int = Field(default=3600)
|
| 83 |
+
cache_generation_ttl: int = Field(default=1800)
|
| 84 |
+
|
| 85 |
+
# Circuit Breaker
|
| 86 |
+
circuit_breaker_failure_threshold: int = Field(default=5)
|
| 87 |
+
circuit_breaker_recovery_timeout: int = Field(default=60)
|
| 88 |
+
|
| 89 |
+
# Retry Policy
|
| 90 |
+
retry_max_attempts: int = Field(default=3)
|
| 91 |
+
retry_wait_exponential_multiplier: int = Field(default=1)
|
| 92 |
+
retry_wait_exponential_max: int = Field(default=10)
|
| 93 |
+
|
| 94 |
+
# Rate Limiting
|
| 95 |
+
rate_limit_enabled: bool = Field(default=True)
|
| 96 |
+
rate_limit_per_minute: int = Field(default=60)
|
| 97 |
+
rate_limit_per_hour: int = Field(default=1000)
|
| 98 |
+
|
| 99 |
+
# Monitoring
|
| 100 |
+
prometheus_port: int = Field(default=9090)
|
| 101 |
+
enable_tracing: bool = Field(default=True)
|
| 102 |
+
jaeger_agent_host: str = Field(default="localhost")
|
| 103 |
+
jaeger_agent_port: int = Field(default=6831)
|
| 104 |
+
trace_sample_rate: float = Field(default=0.1)
|
| 105 |
+
|
| 106 |
+
# CORS
|
| 107 |
+
cors_origins: List[str] = Field(
|
| 108 |
+
default=["http://localhost:3000", "http://localhost:8000"]
|
| 109 |
+
)
|
| 110 |
+
cors_allow_credentials: bool = Field(default=True)
|
| 111 |
+
|
| 112 |
+
# Security
|
| 113 |
+
secret_key: str = Field(default="your-secret-key-change-in-production")
|
| 114 |
+
algorithm: str = Field(default="HS256")
|
| 115 |
+
access_token_expire_minutes: int = Field(default=30)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
@lru_cache
|
| 119 |
+
def get_settings() -> Settings:
|
| 120 |
+
"""Get cached settings instance"""
|
| 121 |
+
return Settings()
|
app/core/logging.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Core - Structured Logging
|
| 3 |
+
|
| 4 |
+
JSON structured logging with correlation IDs.
|
| 5 |
+
"""
|
| 6 |
+
import logging
|
| 7 |
+
import sys
|
| 8 |
+
from contextvars import ContextVar
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from typing import Any, Dict
|
| 11 |
+
|
| 12 |
+
import structlog
|
| 13 |
+
|
| 14 |
+
# Correlation ID context variable
|
| 15 |
+
correlation_id_var: ContextVar[str] = ContextVar("correlation_id", default="")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_correlation_id() -> str:
|
| 19 |
+
"""Get current correlation ID"""
|
| 20 |
+
return correlation_id_var.get()
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def set_correlation_id(correlation_id: str) -> None:
|
| 24 |
+
"""Set correlation ID for current context"""
|
| 25 |
+
correlation_id_var.set(correlation_id)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def add_correlation_id(logger: Any, method_name: str, event_dict: Dict) -> Dict:
|
| 29 |
+
"""Add correlation ID to log event"""
|
| 30 |
+
event_dict["correlation_id"] = get_correlation_id()
|
| 31 |
+
return event_dict
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def add_service_info(logger: Any, method_name: str, event_dict: Dict) -> Dict:
|
| 35 |
+
"""Add service information to log event"""
|
| 36 |
+
event_dict["service"] = "rag-onboarding-backend"
|
| 37 |
+
event_dict["version"] = "1.0.0"
|
| 38 |
+
return event_dict
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def setup_logging(log_level: str = "INFO") -> None:
|
| 42 |
+
"""Setup structured logging"""
|
| 43 |
+
|
| 44 |
+
# Configure structlog
|
| 45 |
+
structlog.configure(
|
| 46 |
+
processors=[
|
| 47 |
+
structlog.contextvars.merge_contextvars,
|
| 48 |
+
structlog.stdlib.filter_by_level,
|
| 49 |
+
structlog.processors.TimeStamper(fmt="iso"),
|
| 50 |
+
structlog.stdlib.add_logger_name,
|
| 51 |
+
structlog.stdlib.add_log_level,
|
| 52 |
+
structlog.processors.StackInfoRenderer(),
|
| 53 |
+
add_correlation_id,
|
| 54 |
+
add_service_info,
|
| 55 |
+
structlog.processors.format_exc_info,
|
| 56 |
+
structlog.processors.UnicodeDecoder(),
|
| 57 |
+
structlog.processors.JSONRenderer(),
|
| 58 |
+
],
|
| 59 |
+
wrapper_class=structlog.stdlib.BoundLogger,
|
| 60 |
+
context_class=dict,
|
| 61 |
+
logger_factory=structlog.stdlib.LoggerFactory(),
|
| 62 |
+
cache_logger_on_first_use=True,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Configure standard logging
|
| 66 |
+
logging.basicConfig(
|
| 67 |
+
format="%(message)s",
|
| 68 |
+
stream=sys.stdout,
|
| 69 |
+
level=getattr(logging, log_level.upper()),
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def get_logger(name: str) -> structlog.stdlib.BoundLogger:
|
| 74 |
+
"""Get logger instance"""
|
| 75 |
+
return structlog.get_logger(name)
|
app/core/metrics.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Core - Prometheus Metrics
|
| 3 |
+
|
| 4 |
+
Application metrics for monitoring.
|
| 5 |
+
"""
|
| 6 |
+
from prometheus_client import Counter, Gauge, Histogram
|
| 7 |
+
|
| 8 |
+
# Request metrics
|
| 9 |
+
http_requests_total = Counter(
|
| 10 |
+
"http_requests_total",
|
| 11 |
+
"Total HTTP requests",
|
| 12 |
+
["method", "endpoint", "status"],
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
http_request_duration_seconds = Histogram(
|
| 16 |
+
"http_request_duration_seconds",
|
| 17 |
+
"HTTP request duration in seconds",
|
| 18 |
+
["method", "endpoint"],
|
| 19 |
+
buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0],
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# RAG Pipeline metrics
|
| 23 |
+
rag_retrieval_duration_seconds = Histogram(
|
| 24 |
+
"rag_retrieval_duration_seconds",
|
| 25 |
+
"RAG retrieval phase duration",
|
| 26 |
+
["strategy"],
|
| 27 |
+
buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0],
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
rag_reranking_duration_seconds = Histogram(
|
| 31 |
+
"rag_reranking_duration_seconds",
|
| 32 |
+
"RAG reranking phase duration",
|
| 33 |
+
buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0],
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
llm_generation_duration_seconds = Histogram(
|
| 37 |
+
"llm_generation_duration_seconds",
|
| 38 |
+
"LLM generation duration",
|
| 39 |
+
["model"],
|
| 40 |
+
buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 20.0],
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
llm_tokens_used_total = Counter(
|
| 44 |
+
"llm_tokens_used_total",
|
| 45 |
+
"Total LLM tokens used",
|
| 46 |
+
["model", "type"], # type: prompt, completion
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Cache metrics
|
| 50 |
+
cache_hits_total = Counter(
|
| 51 |
+
"cache_hits_total",
|
| 52 |
+
"Total cache hits",
|
| 53 |
+
["cache_type"], # embedding, retrieval, generation
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
cache_misses_total = Counter(
|
| 57 |
+
"cache_misses_total",
|
| 58 |
+
"Total cache misses",
|
| 59 |
+
["cache_type"],
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Business metrics
|
| 63 |
+
queries_total = Counter(
|
| 64 |
+
"queries_total",
|
| 65 |
+
"Total queries processed",
|
| 66 |
+
["department", "status"],
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
documents_indexed_total = Counter(
|
| 70 |
+
"documents_indexed_total",
|
| 71 |
+
"Total documents indexed",
|
| 72 |
+
["department", "file_type"],
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
query_confidence_score = Histogram(
|
| 76 |
+
"query_confidence_score",
|
| 77 |
+
"Query confidence scores",
|
| 78 |
+
["department"],
|
| 79 |
+
buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# System metrics
|
| 83 |
+
active_requests = Gauge(
|
| 84 |
+
"active_requests",
|
| 85 |
+
"Number of active requests",
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
database_connections_active = Gauge(
|
| 89 |
+
"database_connections_active",
|
| 90 |
+
"Active database connections",
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Error metrics
|
| 94 |
+
errors_total = Counter(
|
| 95 |
+
"errors_total",
|
| 96 |
+
"Total errors",
|
| 97 |
+
["error_type", "component"],
|
| 98 |
+
)
|
app/domain/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Domain entities"""
|
app/domain/entities/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Domain entities"""
|
| 2 |
+
|
| 3 |
+
from app.domain.entities.document import Document, DocumentChunk, DocumentStatus, DocumentType
|
| 4 |
+
from app.domain.entities.query import Query, QueryRequest, QueryStatus, Source
|
| 5 |
+
|
| 6 |
+
__all__ = [
|
| 7 |
+
"Document",
|
| 8 |
+
"DocumentChunk",
|
| 9 |
+
"DocumentStatus",
|
| 10 |
+
"DocumentType",
|
| 11 |
+
"Query",
|
| 12 |
+
"QueryRequest",
|
| 13 |
+
"QueryStatus",
|
| 14 |
+
"Source",
|
| 15 |
+
]
|
app/domain/entities/document.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Domain Layer - Document Entity
|
| 3 |
+
|
| 4 |
+
Represents a document in the knowledge base.
|
| 5 |
+
"""
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from enum import Enum
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from uuid import UUID, uuid4
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class DocumentStatus(str, Enum):
|
| 14 |
+
"""Document processing status"""
|
| 15 |
+
|
| 16 |
+
PENDING = "pending"
|
| 17 |
+
PROCESSING = "processing"
|
| 18 |
+
INDEXED = "indexed"
|
| 19 |
+
FAILED = "failed"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class DocumentType(str, Enum):
|
| 23 |
+
"""Supported document types"""
|
| 24 |
+
|
| 25 |
+
PDF = "pdf"
|
| 26 |
+
DOCX = "docx"
|
| 27 |
+
TXT = "txt"
|
| 28 |
+
MD = "md"
|
| 29 |
+
HTML = "html"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@dataclass
|
| 33 |
+
class Document:
|
| 34 |
+
"""Document entity - core business object"""
|
| 35 |
+
|
| 36 |
+
title: str
|
| 37 |
+
filename: str
|
| 38 |
+
file_type: DocumentType
|
| 39 |
+
file_size: int
|
| 40 |
+
storage_path: str
|
| 41 |
+
department: str
|
| 42 |
+
id: UUID = field(default_factory=uuid4)
|
| 43 |
+
status: DocumentStatus = DocumentStatus.PENDING
|
| 44 |
+
upload_session_id: Optional[str] = None
|
| 45 |
+
uploaded_at: datetime = field(default_factory=datetime.utcnow)
|
| 46 |
+
indexed_at: Optional[datetime] = None
|
| 47 |
+
metadata: dict = field(default_factory=dict)
|
| 48 |
+
created_at: datetime = field(default_factory=datetime.utcnow)
|
| 49 |
+
updated_at: datetime = field(default_factory=datetime.utcnow)
|
| 50 |
+
|
| 51 |
+
def mark_as_processing(self) -> None:
|
| 52 |
+
"""Mark document as being processed"""
|
| 53 |
+
self.status = DocumentStatus.PROCESSING
|
| 54 |
+
self.updated_at = datetime.utcnow()
|
| 55 |
+
|
| 56 |
+
def mark_as_indexed(self) -> None:
|
| 57 |
+
"""Mark document as successfully indexed"""
|
| 58 |
+
self.status = DocumentStatus.INDEXED
|
| 59 |
+
self.indexed_at = datetime.utcnow()
|
| 60 |
+
self.updated_at = datetime.utcnow()
|
| 61 |
+
|
| 62 |
+
def mark_as_failed(self) -> None:
|
| 63 |
+
"""Mark document processing as failed"""
|
| 64 |
+
self.status = DocumentStatus.FAILED
|
| 65 |
+
self.updated_at = datetime.utcnow()
|
| 66 |
+
|
| 67 |
+
def is_indexed(self) -> bool:
|
| 68 |
+
"""Check if document is indexed"""
|
| 69 |
+
return self.status == DocumentStatus.INDEXED
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@dataclass
|
| 73 |
+
class DocumentChunk:
|
| 74 |
+
"""Document chunk - piece of document for vector search"""
|
| 75 |
+
|
| 76 |
+
document_id: UUID
|
| 77 |
+
chunk_index: int
|
| 78 |
+
content: str
|
| 79 |
+
token_count: int
|
| 80 |
+
id: UUID = field(default_factory=uuid4)
|
| 81 |
+
vector_id: Optional[str] = None
|
| 82 |
+
metadata: dict = field(default_factory=dict)
|
| 83 |
+
created_at: datetime = field(default_factory=datetime.utcnow)
|
| 84 |
+
|
| 85 |
+
def set_vector_id(self, vector_id: str) -> None:
|
| 86 |
+
"""Set Qdrant vector ID"""
|
| 87 |
+
self.vector_id = vector_id
|
app/domain/entities/query.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Domain Layer - Query Entity
|
| 3 |
+
|
| 4 |
+
Represents a user query and its result.
|
| 5 |
+
"""
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from enum import Enum
|
| 9 |
+
from typing import List, Optional
|
| 10 |
+
from uuid import UUID, uuid4
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class QueryStatus(str, Enum):
|
| 14 |
+
"""Query processing status"""
|
| 15 |
+
|
| 16 |
+
PENDING = "pending"
|
| 17 |
+
PROCESSING = "processing"
|
| 18 |
+
COMPLETED = "completed"
|
| 19 |
+
FAILED = "failed"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@dataclass
|
| 23 |
+
class Source:
|
| 24 |
+
"""Retrieved source/citation for a query answer"""
|
| 25 |
+
|
| 26 |
+
title: str
|
| 27 |
+
content: str
|
| 28 |
+
relevance_score: float
|
| 29 |
+
document_id: UUID
|
| 30 |
+
chunk_index: int
|
| 31 |
+
metadata: dict = field(default_factory=dict)
|
| 32 |
+
|
| 33 |
+
def to_dict(self) -> dict:
|
| 34 |
+
"""Convert to dictionary"""
|
| 35 |
+
return {
|
| 36 |
+
"title": self.title,
|
| 37 |
+
"content": self.content,
|
| 38 |
+
"relevance_score": self.relevance_score,
|
| 39 |
+
"document_id": str(self.document_id),
|
| 40 |
+
"chunk_index": self.chunk_index,
|
| 41 |
+
"metadata": self.metadata,
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@dataclass
|
| 46 |
+
class Query:
|
| 47 |
+
"""Query entity - represents user question"""
|
| 48 |
+
|
| 49 |
+
query_text: str
|
| 50 |
+
department: str
|
| 51 |
+
user_id: Optional[str] = None
|
| 52 |
+
session_id: Optional[str] = None
|
| 53 |
+
id: UUID = field(default_factory=uuid4)
|
| 54 |
+
status: QueryStatus = QueryStatus.PENDING
|
| 55 |
+
answer: Optional[str] = None
|
| 56 |
+
sources: List[Source] = field(default_factory=list)
|
| 57 |
+
confidence: float = 0.0
|
| 58 |
+
duration_ms: int = 0
|
| 59 |
+
tokens_used: int = 0
|
| 60 |
+
model: Optional[str] = None
|
| 61 |
+
created_at: datetime = field(default_factory=datetime.utcnow)
|
| 62 |
+
completed_at: Optional[datetime] = None
|
| 63 |
+
|
| 64 |
+
def mark_as_processing(self) -> None:
|
| 65 |
+
"""Mark query as being processed"""
|
| 66 |
+
self.status = QueryStatus.PROCESSING
|
| 67 |
+
|
| 68 |
+
def mark_as_completed(
|
| 69 |
+
self,
|
| 70 |
+
answer: str,
|
| 71 |
+
sources: List[Source],
|
| 72 |
+
confidence: float,
|
| 73 |
+
duration_ms: int,
|
| 74 |
+
tokens_used: int,
|
| 75 |
+
model: str,
|
| 76 |
+
) -> None:
|
| 77 |
+
"""Mark query as completed with results"""
|
| 78 |
+
self.status = QueryStatus.COMPLETED
|
| 79 |
+
self.answer = answer
|
| 80 |
+
self.sources = sources
|
| 81 |
+
self.confidence = confidence
|
| 82 |
+
self.duration_ms = duration_ms
|
| 83 |
+
self.tokens_used = tokens_used
|
| 84 |
+
self.model = model
|
| 85 |
+
self.completed_at = datetime.utcnow()
|
| 86 |
+
|
| 87 |
+
def mark_as_failed(self) -> None:
|
| 88 |
+
"""Mark query as failed"""
|
| 89 |
+
self.status = QueryStatus.FAILED
|
| 90 |
+
self.completed_at = datetime.utcnow()
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
@dataclass
|
| 94 |
+
class QueryRequest:
|
| 95 |
+
"""Query request from user - value object"""
|
| 96 |
+
|
| 97 |
+
query_text: str
|
| 98 |
+
department: str
|
| 99 |
+
user_id: Optional[str] = None
|
| 100 |
+
session_id: Optional[str] = None
|
| 101 |
+
top_k: int = 10
|
| 102 |
+
temperature: float = 0.7
|
| 103 |
+
max_tokens: int = 2048
|
| 104 |
+
filters: dict = field(default_factory=dict)
|
| 105 |
+
|
| 106 |
+
def __post_init__(self) -> None:
|
| 107 |
+
"""Validate query request"""
|
| 108 |
+
if not self.query_text or len(self.query_text.strip()) == 0:
|
| 109 |
+
raise ValueError("query_text cannot be empty")
|
| 110 |
+
if self.top_k < 1 or self.top_k > 50:
|
| 111 |
+
raise ValueError("top_k must be between 1 and 50")
|
| 112 |
+
if self.temperature < 0 or self.temperature > 1:
|
| 113 |
+
raise ValueError("temperature must be between 0 and 1")
|
app/domain/interfaces/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Domain interfaces"""
|
| 2 |
+
|
| 3 |
+
from app.domain.interfaces.cache import ICache
|
| 4 |
+
from app.domain.interfaces.llm import ILLM, IPromptBuilder, LLMMessage, LLMResponse
|
| 5 |
+
from app.domain.interfaces.repository import IChunkRepository, IDocumentRepository
|
| 6 |
+
from app.domain.interfaces.retriever import IEmbedder, IReranker, IRetriever, RetrievalResult
|
| 7 |
+
|
| 8 |
+
__all__ = [
|
| 9 |
+
"ICache",
|
| 10 |
+
"IChunkRepository",
|
| 11 |
+
"IDocumentRepository",
|
| 12 |
+
"IEmbedder",
|
| 13 |
+
"ILLM",
|
| 14 |
+
"IPromptBuilder",
|
| 15 |
+
"IReranker",
|
| 16 |
+
"IRetriever",
|
| 17 |
+
"LLMMessage",
|
| 18 |
+
"LLMResponse",
|
| 19 |
+
"RetrievalResult",
|
| 20 |
+
]
|
app/domain/interfaces/cache.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Domain Layer - Cache Interface
|
| 3 |
+
|
| 4 |
+
Defines contract for caching implementations.
|
| 5 |
+
"""
|
| 6 |
+
from abc import ABC, abstractmethod
|
| 7 |
+
from typing import Any, Optional
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class ICache(ABC):
|
| 11 |
+
"""Interface for cache implementations"""
|
| 12 |
+
|
| 13 |
+
@abstractmethod
|
| 14 |
+
async def get(self, key: str) -> Optional[Any]:
|
| 15 |
+
"""Get value from cache"""
|
| 16 |
+
pass
|
| 17 |
+
|
| 18 |
+
@abstractmethod
|
| 19 |
+
async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool:
|
| 20 |
+
"""Set value in cache"""
|
| 21 |
+
pass
|
| 22 |
+
|
| 23 |
+
@abstractmethod
|
| 24 |
+
async def delete(self, key: str) -> bool:
|
| 25 |
+
"""Delete key from cache"""
|
| 26 |
+
pass
|
| 27 |
+
|
| 28 |
+
@abstractmethod
|
| 29 |
+
async def exists(self, key: str) -> bool:
|
| 30 |
+
"""Check if key exists"""
|
| 31 |
+
pass
|
| 32 |
+
|
| 33 |
+
@abstractmethod
|
| 34 |
+
async def clear(self, pattern: str = "*") -> int:
|
| 35 |
+
"""Clear cache by pattern"""
|
| 36 |
+
pass
|
app/domain/interfaces/llm.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Domain Layer - LLM Interface
|
| 3 |
+
|
| 4 |
+
Defines contract for LLM implementations.
|
| 5 |
+
"""
|
| 6 |
+
from abc import ABC, abstractmethod
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from typing import AsyncIterator, List, Optional
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class LLMMessage:
|
| 13 |
+
"""Chat message"""
|
| 14 |
+
|
| 15 |
+
role: str # system, user, assistant
|
| 16 |
+
content: str
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@dataclass
|
| 20 |
+
class LLMResponse:
|
| 21 |
+
"""LLM generation response"""
|
| 22 |
+
|
| 23 |
+
content: str
|
| 24 |
+
model: str
|
| 25 |
+
tokens_used: int
|
| 26 |
+
finish_reason: str
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class ILLM(ABC):
|
| 30 |
+
"""Interface for LLM implementations"""
|
| 31 |
+
|
| 32 |
+
@abstractmethod
|
| 33 |
+
async def generate(
|
| 34 |
+
self,
|
| 35 |
+
messages: List[LLMMessage],
|
| 36 |
+
temperature: float = 0.7,
|
| 37 |
+
max_tokens: int = 2048,
|
| 38 |
+
stream: bool = False,
|
| 39 |
+
) -> LLMResponse:
|
| 40 |
+
"""Generate response from LLM"""
|
| 41 |
+
pass
|
| 42 |
+
|
| 43 |
+
@abstractmethod
|
| 44 |
+
async def generate_stream(
|
| 45 |
+
self,
|
| 46 |
+
messages: List[LLMMessage],
|
| 47 |
+
temperature: float = 0.7,
|
| 48 |
+
max_tokens: int = 2048,
|
| 49 |
+
) -> AsyncIterator[str]:
|
| 50 |
+
"""Generate streaming response from LLM"""
|
| 51 |
+
pass
|
| 52 |
+
|
| 53 |
+
@abstractmethod
|
| 54 |
+
def get_model_name(self) -> str:
|
| 55 |
+
"""Get model name"""
|
| 56 |
+
pass
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class IPromptBuilder(ABC):
|
| 60 |
+
"""Interface for prompt building"""
|
| 61 |
+
|
| 62 |
+
@abstractmethod
|
| 63 |
+
def build_rag_prompt(
|
| 64 |
+
self, query: str, context: List[str], system_prompt: Optional[str] = None
|
| 65 |
+
) -> List[LLMMessage]:
|
| 66 |
+
"""Build RAG prompt with query and context"""
|
| 67 |
+
pass
|
| 68 |
+
|
| 69 |
+
@abstractmethod
|
| 70 |
+
def build_query_expansion_prompt(self, query: str) -> List[LLMMessage]:
|
| 71 |
+
"""Build prompt for query expansion"""
|
| 72 |
+
pass
|
app/domain/interfaces/repository.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Domain Layer - Interfaces (Ports)
|
| 3 |
+
|
| 4 |
+
Defines contracts for infrastructure implementations.
|
| 5 |
+
"""
|
| 6 |
+
from abc import ABC, abstractmethod
|
| 7 |
+
from typing import List, Optional
|
| 8 |
+
from uuid import UUID
|
| 9 |
+
|
| 10 |
+
from app.domain.entities import Document, DocumentChunk
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class IDocumentRepository(ABC):
|
| 14 |
+
"""Repository interface for document persistence"""
|
| 15 |
+
|
| 16 |
+
@abstractmethod
|
| 17 |
+
async def create(self, document: Document) -> Document:
|
| 18 |
+
"""Create new document"""
|
| 19 |
+
pass
|
| 20 |
+
|
| 21 |
+
@abstractmethod
|
| 22 |
+
async def get_by_id(self, document_id: UUID) -> Optional[Document]:
|
| 23 |
+
"""Get document by ID"""
|
| 24 |
+
pass
|
| 25 |
+
|
| 26 |
+
@abstractmethod
|
| 27 |
+
async def update(self, document: Document) -> Document:
|
| 28 |
+
"""Update document"""
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
@abstractmethod
|
| 32 |
+
async def list_by_department(
|
| 33 |
+
self, department: str, skip: int = 0, limit: int = 100
|
| 34 |
+
) -> List[Document]:
|
| 35 |
+
"""List documents by department"""
|
| 36 |
+
pass
|
| 37 |
+
|
| 38 |
+
@abstractmethod
|
| 39 |
+
async def delete(self, document_id: UUID) -> bool:
|
| 40 |
+
"""Delete document"""
|
| 41 |
+
pass
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class IChunkRepository(ABC):
|
| 45 |
+
"""Repository interface for document chunks"""
|
| 46 |
+
|
| 47 |
+
@abstractmethod
|
| 48 |
+
async def create_bulk(self, chunks: List[DocumentChunk]) -> List[DocumentChunk]:
|
| 49 |
+
"""Create multiple chunks"""
|
| 50 |
+
pass
|
| 51 |
+
|
| 52 |
+
@abstractmethod
|
| 53 |
+
async def get_by_document_id(self, document_id: UUID) -> List[DocumentChunk]:
|
| 54 |
+
"""Get all chunks for a document"""
|
| 55 |
+
pass
|
| 56 |
+
|
| 57 |
+
@abstractmethod
|
| 58 |
+
async def delete_by_document_id(self, document_id: UUID) -> int:
|
| 59 |
+
"""Delete all chunks for a document"""
|
| 60 |
+
pass
|
app/domain/interfaces/retriever.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Domain Layer - Retriever Interface
|
| 3 |
+
|
| 4 |
+
Defines contract for document retrieval implementations.
|
| 5 |
+
"""
|
| 6 |
+
from abc import ABC, abstractmethod
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from typing import List, Optional
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class RetrievalResult:
|
| 13 |
+
"""Single retrieval result"""
|
| 14 |
+
|
| 15 |
+
content: str
|
| 16 |
+
score: float
|
| 17 |
+
document_id: str
|
| 18 |
+
chunk_index: int
|
| 19 |
+
metadata: dict
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class IRetriever(ABC):
|
| 23 |
+
"""Interface for document retrieval"""
|
| 24 |
+
|
| 25 |
+
@abstractmethod
|
| 26 |
+
async def search(
|
| 27 |
+
self,
|
| 28 |
+
query: str,
|
| 29 |
+
top_k: int = 10,
|
| 30 |
+
filters: Optional[dict] = None,
|
| 31 |
+
min_score: float = 0.0,
|
| 32 |
+
) -> List[RetrievalResult]:
|
| 33 |
+
"""Search for relevant documents"""
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
@abstractmethod
|
| 37 |
+
async def hybrid_search(
|
| 38 |
+
self,
|
| 39 |
+
query: str,
|
| 40 |
+
top_k: int = 10,
|
| 41 |
+
alpha: float = 0.5,
|
| 42 |
+
filters: Optional[dict] = None,
|
| 43 |
+
) -> List[RetrievalResult]:
|
| 44 |
+
"""Hybrid search (semantic + keyword)"""
|
| 45 |
+
pass
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class IReranker(ABC):
|
| 49 |
+
"""Interface for result reranking"""
|
| 50 |
+
|
| 51 |
+
@abstractmethod
|
| 52 |
+
async def rerank(
|
| 53 |
+
self, query: str, results: List[RetrievalResult], top_k: int = 10
|
| 54 |
+
) -> List[RetrievalResult]:
|
| 55 |
+
"""Rerank retrieval results"""
|
| 56 |
+
pass
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class IEmbedder(ABC):
|
| 60 |
+
"""Interface for text embedding"""
|
| 61 |
+
|
| 62 |
+
@abstractmethod
|
| 63 |
+
async def embed_text(self, text: str) -> List[float]:
|
| 64 |
+
"""Generate embedding for single text"""
|
| 65 |
+
pass
|
| 66 |
+
|
| 67 |
+
@abstractmethod
|
| 68 |
+
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
| 69 |
+
"""Generate embeddings for multiple texts"""
|
| 70 |
+
pass
|
| 71 |
+
|
| 72 |
+
@abstractmethod
|
| 73 |
+
def get_dimension(self) -> int:
|
| 74 |
+
"""Get embedding dimension"""
|
| 75 |
+
pass
|
app/infrastructure/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Infrastructure layer"""
|
app/infrastructure/cache/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Cache implementations"""
|
app/infrastructure/cache/redis_cache.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Infrastructure - Redis Cache Implementation
|
| 3 |
+
"""
|
| 4 |
+
import json
|
| 5 |
+
from typing import Any, Optional
|
| 6 |
+
|
| 7 |
+
import redis.asyncio as redis
|
| 8 |
+
|
| 9 |
+
from app.domain.interfaces import ICache
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class RedisCache(ICache):
|
| 13 |
+
"""Redis cache implementation"""
|
| 14 |
+
|
| 15 |
+
def __init__(self, redis_url: str):
|
| 16 |
+
self.redis_url = redis_url
|
| 17 |
+
self._client: Optional[redis.Redis] = None
|
| 18 |
+
|
| 19 |
+
async def _get_client(self) -> redis.Redis:
|
| 20 |
+
"""Get or create Redis client"""
|
| 21 |
+
if self._client is None:
|
| 22 |
+
self._client = await redis.from_url(
|
| 23 |
+
self.redis_url, encoding="utf-8", decode_responses=True
|
| 24 |
+
)
|
| 25 |
+
return self._client
|
| 26 |
+
|
| 27 |
+
async def get(self, key: str) -> Optional[Any]:
|
| 28 |
+
"""Get value from cache"""
|
| 29 |
+
client = await self._get_client()
|
| 30 |
+
value = await client.get(key)
|
| 31 |
+
|
| 32 |
+
if value is None:
|
| 33 |
+
return None
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
return json.loads(value)
|
| 37 |
+
except json.JSONDecodeError:
|
| 38 |
+
return value
|
| 39 |
+
|
| 40 |
+
async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool:
|
| 41 |
+
"""Set value in cache"""
|
| 42 |
+
client = await self._get_client()
|
| 43 |
+
|
| 44 |
+
# Serialize value
|
| 45 |
+
if isinstance(value, (dict, list)):
|
| 46 |
+
serialized = json.dumps(value)
|
| 47 |
+
else:
|
| 48 |
+
serialized = str(value)
|
| 49 |
+
|
| 50 |
+
if ttl:
|
| 51 |
+
await client.setex(key, ttl, serialized)
|
| 52 |
+
else:
|
| 53 |
+
await client.set(key, serialized)
|
| 54 |
+
|
| 55 |
+
return True
|
| 56 |
+
|
| 57 |
+
async def delete(self, key: str) -> bool:
|
| 58 |
+
"""Delete key from cache"""
|
| 59 |
+
client = await self._get_client()
|
| 60 |
+
result = await client.delete(key)
|
| 61 |
+
return result > 0
|
| 62 |
+
|
| 63 |
+
async def exists(self, key: str) -> bool:
|
| 64 |
+
"""Check if key exists"""
|
| 65 |
+
client = await self._get_client()
|
| 66 |
+
return await client.exists(key) > 0
|
| 67 |
+
|
| 68 |
+
async def clear(self, pattern: str = "*") -> int:
|
| 69 |
+
"""Clear cache by pattern"""
|
| 70 |
+
client = await self._get_client()
|
| 71 |
+
keys = []
|
| 72 |
+
|
| 73 |
+
async for key in client.scan_iter(match=pattern):
|
| 74 |
+
keys.append(key)
|
| 75 |
+
|
| 76 |
+
if keys:
|
| 77 |
+
return await client.delete(*keys)
|
| 78 |
+
|
| 79 |
+
return 0
|
| 80 |
+
|
| 81 |
+
async def close(self) -> None:
|
| 82 |
+
"""Close Redis connection"""
|
| 83 |
+
if self._client:
|
| 84 |
+
await self._client.close()
|
app/infrastructure/database/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Database infrastructure"""
|
app/infrastructure/database/models.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Infrastructure - Database Models (SQLAlchemy)
|
| 3 |
+
"""
|
| 4 |
+
import uuid
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
from sqlalchemy import Column, DateTime, Integer, String, Text, BigInteger, Index
|
| 8 |
+
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
| 9 |
+
from sqlalchemy.ext.declarative import declarative_base
|
| 10 |
+
|
| 11 |
+
Base = declarative_base()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class DocumentModel(Base):
|
| 15 |
+
"""Document table model"""
|
| 16 |
+
|
| 17 |
+
__tablename__ = "documents"
|
| 18 |
+
|
| 19 |
+
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
| 20 |
+
title = Column(String(500), nullable=False, index=True)
|
| 21 |
+
filename = Column(String(255), nullable=False)
|
| 22 |
+
file_type = Column(String(50), nullable=False, index=True)
|
| 23 |
+
file_size = Column(BigInteger, nullable=False)
|
| 24 |
+
storage_path = Column(String(1000), nullable=False)
|
| 25 |
+
department = Column(String(100), nullable=False, index=True)
|
| 26 |
+
status = Column(String(50), nullable=False, default="pending", index=True)
|
| 27 |
+
upload_session_id = Column(String(100), nullable=True)
|
| 28 |
+
uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
|
| 29 |
+
indexed_at = Column(DateTime(timezone=True), nullable=True)
|
| 30 |
+
metadata = Column(JSONB, default={}, nullable=False)
|
| 31 |
+
created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
|
| 32 |
+
updated_at = Column(
|
| 33 |
+
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
__table_args__ = (
|
| 37 |
+
Index("ix_documents_department_status", "department", "status"),
|
| 38 |
+
Index("ix_documents_created_at", "created_at"),
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class DocumentChunkModel(Base):
|
| 43 |
+
"""Document chunk table model"""
|
| 44 |
+
|
| 45 |
+
__tablename__ = "document_chunks"
|
| 46 |
+
|
| 47 |
+
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
| 48 |
+
document_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
| 49 |
+
chunk_index = Column(Integer, nullable=False)
|
| 50 |
+
content = Column(Text, nullable=False)
|
| 51 |
+
token_count = Column(Integer, nullable=False)
|
| 52 |
+
vector_id = Column(String(100), nullable=True, index=True)
|
| 53 |
+
metadata = Column(JSONB, default={}, nullable=False)
|
| 54 |
+
created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
|
| 55 |
+
|
| 56 |
+
__table_args__ = (Index("ix_chunks_document_id_index", "document_id", "chunk_index"),)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class QueryModel(Base):
|
| 60 |
+
"""Query table model"""
|
| 61 |
+
|
| 62 |
+
__tablename__ = "queries"
|
| 63 |
+
|
| 64 |
+
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
| 65 |
+
query_text = Column(Text, nullable=False)
|
| 66 |
+
department = Column(String(100), nullable=False, index=True)
|
| 67 |
+
user_id = Column(String(100), nullable=True, index=True)
|
| 68 |
+
session_id = Column(String(100), nullable=True, index=True)
|
| 69 |
+
status = Column(String(50), nullable=False, default="pending", index=True)
|
| 70 |
+
answer = Column(Text, nullable=True)
|
| 71 |
+
sources = Column(JSONB, default=[], nullable=False)
|
| 72 |
+
confidence = Column(Integer, default=0, nullable=False) # Store as int (0-100)
|
| 73 |
+
duration_ms = Column(Integer, default=0, nullable=False)
|
| 74 |
+
tokens_used = Column(Integer, default=0, nullable=False)
|
| 75 |
+
model = Column(String(100), nullable=True)
|
| 76 |
+
created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False, index=True)
|
| 77 |
+
completed_at = Column(DateTime(timezone=True), nullable=True)
|
| 78 |
+
|
| 79 |
+
__table_args__ = (
|
| 80 |
+
Index("ix_queries_department_created", "department", "created_at"),
|
| 81 |
+
Index("ix_queries_user_created", "user_id", "created_at"),
|
| 82 |
+
)
|
app/infrastructure/external/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""External services"""
|
app/infrastructure/external/embedder.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Infrastructure - Sentence Transformers Embedding Service
|
| 3 |
+
"""
|
| 4 |
+
from typing import List
|
| 5 |
+
|
| 6 |
+
from sentence_transformers import SentenceTransformer
|
| 7 |
+
|
| 8 |
+
from app.domain.interfaces import IEmbedder
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class SentenceTransformerEmbedder(IEmbedder):
|
| 12 |
+
"""Sentence Transformers embedding implementation"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
| 15 |
+
self.model_name = model_name
|
| 16 |
+
self.model = SentenceTransformer(model_name)
|
| 17 |
+
self.dimension = self.model.get_sentence_embedding_dimension()
|
| 18 |
+
|
| 19 |
+
async def embed_text(self, text: str) -> List[float]:
|
| 20 |
+
"""Generate embedding for single text"""
|
| 21 |
+
embedding = self.model.encode(text, convert_to_numpy=True)
|
| 22 |
+
return embedding.tolist()
|
| 23 |
+
|
| 24 |
+
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
| 25 |
+
"""Generate embeddings for multiple texts"""
|
| 26 |
+
embeddings = self.model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
| 27 |
+
return embeddings.tolist()
|
| 28 |
+
|
| 29 |
+
def get_dimension(self) -> int:
|
| 30 |
+
"""Get embedding dimension"""
|
| 31 |
+
return self.dimension
|
app/infrastructure/external/gemini_llm.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Infrastructure - Gemini LLM Service
|
| 3 |
+
"""
|
| 4 |
+
from typing import AsyncIterator, List
|
| 5 |
+
|
| 6 |
+
import google.generativeai as genai
|
| 7 |
+
|
| 8 |
+
from app.domain.interfaces import ILLM, LLMMessage, LLMResponse
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class GeminiLLM(ILLM):
|
| 12 |
+
"""Gemini LLM implementation"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
|
| 15 |
+
genai.configure(api_key=api_key)
|
| 16 |
+
self.model_name = model_name
|
| 17 |
+
self.model = genai.GenerativeModel(model_name)
|
| 18 |
+
|
| 19 |
+
async def generate(
|
| 20 |
+
self,
|
| 21 |
+
messages: List[LLMMessage],
|
| 22 |
+
temperature: float = 0.7,
|
| 23 |
+
max_tokens: int = 2048,
|
| 24 |
+
stream: bool = False,
|
| 25 |
+
) -> LLMResponse:
|
| 26 |
+
"""Generate response from Gemini"""
|
| 27 |
+
|
| 28 |
+
# Convert messages to Gemini format
|
| 29 |
+
prompt = self._build_prompt(messages)
|
| 30 |
+
|
| 31 |
+
# Generate
|
| 32 |
+
response = await self.model.generate_content_async(
|
| 33 |
+
prompt,
|
| 34 |
+
generation_config=genai.types.GenerationConfig(
|
| 35 |
+
temperature=temperature, max_output_tokens=max_tokens
|
| 36 |
+
),
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Count tokens (approximate)
|
| 40 |
+
tokens_used = len(prompt.split()) + len(response.text.split())
|
| 41 |
+
|
| 42 |
+
return LLMResponse(
|
| 43 |
+
content=response.text,
|
| 44 |
+
model=self.model_name,
|
| 45 |
+
tokens_used=tokens_used,
|
| 46 |
+
finish_reason="stop",
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
async def generate_stream(
|
| 50 |
+
self,
|
| 51 |
+
messages: List[LLMMessage],
|
| 52 |
+
temperature: float = 0.7,
|
| 53 |
+
max_tokens: int = 2048,
|
| 54 |
+
) -> AsyncIterator[str]:
|
| 55 |
+
"""Generate streaming response from Gemini"""
|
| 56 |
+
|
| 57 |
+
prompt = self._build_prompt(messages)
|
| 58 |
+
|
| 59 |
+
response = await self.model.generate_content_async(
|
| 60 |
+
prompt,
|
| 61 |
+
generation_config=genai.types.GenerationConfig(
|
| 62 |
+
temperature=temperature, max_output_tokens=max_tokens
|
| 63 |
+
),
|
| 64 |
+
stream=True,
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
async for chunk in response:
|
| 68 |
+
if chunk.text:
|
| 69 |
+
yield chunk.text
|
| 70 |
+
|
| 71 |
+
def get_model_name(self) -> str:
|
| 72 |
+
"""Get model name"""
|
| 73 |
+
return self.model_name
|
| 74 |
+
|
| 75 |
+
def _build_prompt(self, messages: List[LLMMessage]) -> str:
|
| 76 |
+
"""Build prompt from messages"""
|
| 77 |
+
parts = []
|
| 78 |
+
|
| 79 |
+
for msg in messages:
|
| 80 |
+
if msg.role == "system":
|
| 81 |
+
parts.append(f"System: {msg.content}")
|
| 82 |
+
elif msg.role == "user":
|
| 83 |
+
parts.append(f"User: {msg.content}")
|
| 84 |
+
elif msg.role == "assistant":
|
| 85 |
+
parts.append(f"Assistant: {msg.content}")
|
| 86 |
+
|
| 87 |
+
return "\n\n".join(parts)
|
app/infrastructure/external/prompt_builder.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Infrastructure - Prompt Builder Implementation
|
| 3 |
+
"""
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
|
| 6 |
+
from app.domain.interfaces import IPromptBuilder, LLMMessage
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DefaultPromptBuilder(IPromptBuilder):
|
| 10 |
+
"""Default prompt builder implementation"""
|
| 11 |
+
|
| 12 |
+
def build_rag_prompt(
|
| 13 |
+
self, query: str, context: List[str], system_prompt: Optional[str] = None
|
| 14 |
+
) -> List[LLMMessage]:
|
| 15 |
+
"""Build RAG prompt with query and context"""
|
| 16 |
+
|
| 17 |
+
if system_prompt is None:
|
| 18 |
+
system_prompt = """You are a helpful corporate onboarding assistant.
|
| 19 |
+
Answer questions based ONLY on the provided context.
|
| 20 |
+
If the answer is not in the context, say "I don't have enough information to answer that question."
|
| 21 |
+
Always cite your sources by referencing the relevant context sections."""
|
| 22 |
+
|
| 23 |
+
# Build context string
|
| 24 |
+
context_str = "\n\n---\n\n".join(
|
| 25 |
+
[f"[Context {i+1}]\n{ctx}" for i, ctx in enumerate(context)]
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
user_message = f"""Context:
|
| 29 |
+
{context_str}
|
| 30 |
+
|
| 31 |
+
Question: {query}
|
| 32 |
+
|
| 33 |
+
Please provide a clear, accurate answer based on the context above. Include citations."""
|
| 34 |
+
|
| 35 |
+
return [
|
| 36 |
+
LLMMessage(role="system", content=system_prompt),
|
| 37 |
+
LLMMessage(role="user", content=user_message),
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
def build_query_expansion_prompt(self, query: str) -> List[LLMMessage]:
|
| 41 |
+
"""Build prompt for query expansion"""
|
| 42 |
+
|
| 43 |
+
system_prompt = """You are a query expansion expert.
|
| 44 |
+
Generate 2-3 alternative phrasings of the user's question to improve retrieval.
|
| 45 |
+
Return only the alternative questions, one per line."""
|
| 46 |
+
|
| 47 |
+
user_message = f"""Original question: {query}
|
| 48 |
+
|
| 49 |
+
Generate alternative phrasings:"""
|
| 50 |
+
|
| 51 |
+
return [
|
| 52 |
+
LLMMessage(role="system", content=system_prompt),
|
| 53 |
+
LLMMessage(role="user", content=user_message),
|
| 54 |
+
]
|
app/infrastructure/external/qdrant_retriever.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Infrastructure - Qdrant Vector Store
|
| 3 |
+
"""
|
| 4 |
+
from typing import Dict, List, Optional
|
| 5 |
+
from uuid import UUID
|
| 6 |
+
|
| 7 |
+
from qdrant_client import AsyncQdrantClient
|
| 8 |
+
from qdrant_client.models import Distance, PointStruct, VectorParams, Filter, FieldCondition, MatchValue
|
| 9 |
+
|
| 10 |
+
from app.domain.interfaces import IRetriever, RetrievalResult
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class QdrantRetriever(IRetriever):
|
| 14 |
+
"""Qdrant vector store implementation"""
|
| 15 |
+
|
| 16 |
+
def __init__(
|
| 17 |
+
self,
|
| 18 |
+
url: str,
|
| 19 |
+
collection_name: str,
|
| 20 |
+
vector_size: int = 384,
|
| 21 |
+
api_key: Optional[str] = None,
|
| 22 |
+
):
|
| 23 |
+
self.url = url
|
| 24 |
+
self.collection_name = collection_name
|
| 25 |
+
self.vector_size = vector_size
|
| 26 |
+
self.client = AsyncQdrantClient(url=url, api_key=api_key)
|
| 27 |
+
|
| 28 |
+
async def initialize_collection(self) -> None:
|
| 29 |
+
"""Initialize Qdrant collection"""
|
| 30 |
+
collections = await self.client.get_collections()
|
| 31 |
+
collection_names = [c.name for c in collections.collections]
|
| 32 |
+
|
| 33 |
+
if self.collection_name not in collection_names:
|
| 34 |
+
await self.client.create_collection(
|
| 35 |
+
collection_name=self.collection_name,
|
| 36 |
+
vectors_config=VectorParams(size=self.vector_size, distance=Distance.COSINE),
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
async def search(
|
| 40 |
+
self,
|
| 41 |
+
query: str,
|
| 42 |
+
top_k: int = 10,
|
| 43 |
+
filters: Optional[dict] = None,
|
| 44 |
+
min_score: float = 0.0,
|
| 45 |
+
) -> List[RetrievalResult]:
|
| 46 |
+
"""Search for relevant documents"""
|
| 47 |
+
# Note: This requires embedding the query first
|
| 48 |
+
# In practice, this would be called with query_vector
|
| 49 |
+
raise NotImplementedError("Use search_by_vector instead")
|
| 50 |
+
|
| 51 |
+
async def search_by_vector(
|
| 52 |
+
self,
|
| 53 |
+
query_vector: List[float],
|
| 54 |
+
top_k: int = 10,
|
| 55 |
+
filters: Optional[dict] = None,
|
| 56 |
+
min_score: float = 0.0,
|
| 57 |
+
) -> List[RetrievalResult]:
|
| 58 |
+
"""Search by pre-computed vector"""
|
| 59 |
+
|
| 60 |
+
# Build filter
|
| 61 |
+
qdrant_filter = None
|
| 62 |
+
if filters:
|
| 63 |
+
conditions = []
|
| 64 |
+
for key, value in filters.items():
|
| 65 |
+
conditions.append(
|
| 66 |
+
FieldCondition(key=key, match=MatchValue(value=value))
|
| 67 |
+
)
|
| 68 |
+
if conditions:
|
| 69 |
+
qdrant_filter = Filter(must=conditions)
|
| 70 |
+
|
| 71 |
+
# Search
|
| 72 |
+
search_result = await self.client.search(
|
| 73 |
+
collection_name=self.collection_name,
|
| 74 |
+
query_vector=query_vector,
|
| 75 |
+
limit=top_k,
|
| 76 |
+
query_filter=qdrant_filter,
|
| 77 |
+
score_threshold=min_score,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# Convert to RetrievalResult
|
| 81 |
+
results = []
|
| 82 |
+
for hit in search_result:
|
| 83 |
+
payload = hit.payload or {}
|
| 84 |
+
results.append(
|
| 85 |
+
RetrievalResult(
|
| 86 |
+
content=payload.get("content", ""),
|
| 87 |
+
score=hit.score,
|
| 88 |
+
document_id=payload.get("document_id", ""),
|
| 89 |
+
chunk_index=payload.get("chunk_index", 0),
|
| 90 |
+
metadata=payload.get("metadata", {}),
|
| 91 |
+
)
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
return results
|
| 95 |
+
|
| 96 |
+
async def hybrid_search(
|
| 97 |
+
self,
|
| 98 |
+
query: str,
|
| 99 |
+
top_k: int = 10,
|
| 100 |
+
alpha: float = 0.5,
|
| 101 |
+
filters: Optional[dict] = None,
|
| 102 |
+
) -> List[RetrievalResult]:
|
| 103 |
+
"""Hybrid search (semantic + keyword)"""
|
| 104 |
+
# Simplified - in production combine with keyword search
|
| 105 |
+
raise NotImplementedError("Hybrid search requires integration with keyword search")
|
| 106 |
+
|
| 107 |
+
async def upsert_points(
|
| 108 |
+
self,
|
| 109 |
+
points: List[Dict],
|
| 110 |
+
) -> None:
|
| 111 |
+
"""Upsert points to collection"""
|
| 112 |
+
qdrant_points = [
|
| 113 |
+
PointStruct(
|
| 114 |
+
id=point["id"],
|
| 115 |
+
vector=point["vector"],
|
| 116 |
+
payload=point["payload"],
|
| 117 |
+
)
|
| 118 |
+
for point in points
|
| 119 |
+
]
|
| 120 |
+
|
| 121 |
+
await self.client.upsert(
|
| 122 |
+
collection_name=self.collection_name,
|
| 123 |
+
points=qdrant_points,
|
| 124 |
+
)
|
app/infrastructure/external/simple_reranker.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Infrastructure - Simple Reranker (Placeholder)
|
| 3 |
+
|
| 4 |
+
In production, use cross-encoder reranker.
|
| 5 |
+
"""
|
| 6 |
+
from typing import List
|
| 7 |
+
|
| 8 |
+
from app.domain.interfaces import IReranker, RetrievalResult
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class SimpleReranker(IReranker):
|
| 12 |
+
"""Simple reranker - just returns top-k by score"""
|
| 13 |
+
|
| 14 |
+
async def rerank(
|
| 15 |
+
self, query: str, results: List[RetrievalResult], top_k: int = 10
|
| 16 |
+
) -> List[RetrievalResult]:
|
| 17 |
+
"""Rerank results (simplified - just sort by score)"""
|
| 18 |
+
# Sort by score descending
|
| 19 |
+
sorted_results = sorted(results, key=lambda x: x.score, reverse=True)
|
| 20 |
+
return sorted_results[:top_k]
|
app/infrastructure/repositories/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Repositories"""
|
app/infrastructure/repositories/postgres_repository.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Infrastructure - PostgreSQL Repository Implementation
|
| 3 |
+
"""
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
from uuid import UUID
|
| 6 |
+
|
| 7 |
+
from sqlalchemy import select
|
| 8 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 9 |
+
|
| 10 |
+
from app.domain.entities import Document, DocumentChunk, DocumentStatus, DocumentType
|
| 11 |
+
from app.domain.interfaces import IChunkRepository, IDocumentRepository
|
| 12 |
+
from app.infrastructure.database.models import DocumentChunkModel, DocumentModel
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class PostgresDocumentRepository(IDocumentRepository):
|
| 16 |
+
"""PostgreSQL implementation of document repository"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, session: AsyncSession):
|
| 19 |
+
self.session = session
|
| 20 |
+
|
| 21 |
+
async def create(self, document: Document) -> Document:
|
| 22 |
+
"""Create new document"""
|
| 23 |
+
model = DocumentModel(
|
| 24 |
+
id=document.id,
|
| 25 |
+
title=document.title,
|
| 26 |
+
filename=document.filename,
|
| 27 |
+
file_type=document.file_type.value,
|
| 28 |
+
file_size=document.file_size,
|
| 29 |
+
storage_path=document.storage_path,
|
| 30 |
+
department=document.department,
|
| 31 |
+
status=document.status.value,
|
| 32 |
+
upload_session_id=document.upload_session_id,
|
| 33 |
+
uploaded_at=document.uploaded_at,
|
| 34 |
+
indexed_at=document.indexed_at,
|
| 35 |
+
metadata=document.metadata,
|
| 36 |
+
)
|
| 37 |
+
self.session.add(model)
|
| 38 |
+
await self.session.commit()
|
| 39 |
+
await self.session.refresh(model)
|
| 40 |
+
return self._to_entity(model)
|
| 41 |
+
|
| 42 |
+
async def get_by_id(self, document_id: UUID) -> Optional[Document]:
|
| 43 |
+
"""Get document by ID"""
|
| 44 |
+
result = await self.session.execute(
|
| 45 |
+
select(DocumentModel).where(DocumentModel.id == document_id)
|
| 46 |
+
)
|
| 47 |
+
model = result.scalar_one_or_none()
|
| 48 |
+
return self._to_entity(model) if model else None
|
| 49 |
+
|
| 50 |
+
async def update(self, document: Document) -> Document:
|
| 51 |
+
"""Update document"""
|
| 52 |
+
result = await self.session.execute(
|
| 53 |
+
select(DocumentModel).where(DocumentModel.id == document.id)
|
| 54 |
+
)
|
| 55 |
+
model = result.scalar_one_or_none()
|
| 56 |
+
|
| 57 |
+
if not model:
|
| 58 |
+
raise ValueError(f"Document {document.id} not found")
|
| 59 |
+
|
| 60 |
+
model.title = document.title
|
| 61 |
+
model.status = document.status.value
|
| 62 |
+
model.indexed_at = document.indexed_at
|
| 63 |
+
model.metadata = document.metadata
|
| 64 |
+
model.updated_at = document.updated_at
|
| 65 |
+
|
| 66 |
+
await self.session.commit()
|
| 67 |
+
await self.session.refresh(model)
|
| 68 |
+
return self._to_entity(model)
|
| 69 |
+
|
| 70 |
+
async def list_by_department(
|
| 71 |
+
self, department: str, skip: int = 0, limit: int = 100
|
| 72 |
+
) -> List[Document]:
|
| 73 |
+
"""List documents by department"""
|
| 74 |
+
result = await self.session.execute(
|
| 75 |
+
select(DocumentModel)
|
| 76 |
+
.where(DocumentModel.department == department)
|
| 77 |
+
.offset(skip)
|
| 78 |
+
.limit(limit)
|
| 79 |
+
.order_by(DocumentModel.created_at.desc())
|
| 80 |
+
)
|
| 81 |
+
models = result.scalars().all()
|
| 82 |
+
return [self._to_entity(model) for model in models]
|
| 83 |
+
|
| 84 |
+
async def delete(self, document_id: UUID) -> bool:
|
| 85 |
+
"""Delete document"""
|
| 86 |
+
result = await self.session.execute(
|
| 87 |
+
select(DocumentModel).where(DocumentModel.id == document_id)
|
| 88 |
+
)
|
| 89 |
+
model = result.scalar_one_or_none()
|
| 90 |
+
|
| 91 |
+
if not model:
|
| 92 |
+
return False
|
| 93 |
+
|
| 94 |
+
await self.session.delete(model)
|
| 95 |
+
await self.session.commit()
|
| 96 |
+
return True
|
| 97 |
+
|
| 98 |
+
def _to_entity(self, model: DocumentModel) -> Document:
|
| 99 |
+
"""Convert model to entity"""
|
| 100 |
+
return Document(
|
| 101 |
+
id=model.id,
|
| 102 |
+
title=model.title,
|
| 103 |
+
filename=model.filename,
|
| 104 |
+
file_type=DocumentType(model.file_type),
|
| 105 |
+
file_size=model.file_size,
|
| 106 |
+
storage_path=model.storage_path,
|
| 107 |
+
department=model.department,
|
| 108 |
+
status=DocumentStatus(model.status),
|
| 109 |
+
upload_session_id=model.upload_session_id,
|
| 110 |
+
uploaded_at=model.uploaded_at,
|
| 111 |
+
indexed_at=model.indexed_at,
|
| 112 |
+
metadata=model.metadata,
|
| 113 |
+
created_at=model.created_at,
|
| 114 |
+
updated_at=model.updated_at,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class PostgresChunkRepository(IChunkRepository):
|
| 119 |
+
"""PostgreSQL implementation of chunk repository"""
|
| 120 |
+
|
| 121 |
+
def __init__(self, session: AsyncSession):
|
| 122 |
+
self.session = session
|
| 123 |
+
|
| 124 |
+
async def create_bulk(self, chunks: List[DocumentChunk]) -> List[DocumentChunk]:
|
| 125 |
+
"""Create multiple chunks"""
|
| 126 |
+
models = [
|
| 127 |
+
DocumentChunkModel(
|
| 128 |
+
id=chunk.id,
|
| 129 |
+
document_id=chunk.document_id,
|
| 130 |
+
chunk_index=chunk.chunk_index,
|
| 131 |
+
content=chunk.content,
|
| 132 |
+
token_count=chunk.token_count,
|
| 133 |
+
vector_id=chunk.vector_id,
|
| 134 |
+
metadata=chunk.metadata,
|
| 135 |
+
)
|
| 136 |
+
for chunk in chunks
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
self.session.add_all(models)
|
| 140 |
+
await self.session.commit()
|
| 141 |
+
|
| 142 |
+
return chunks
|
| 143 |
+
|
| 144 |
+
async def get_by_document_id(self, document_id: UUID) -> List[DocumentChunk]:
|
| 145 |
+
"""Get all chunks for a document"""
|
| 146 |
+
result = await self.session.execute(
|
| 147 |
+
select(DocumentChunkModel)
|
| 148 |
+
.where(DocumentChunkModel.document_id == document_id)
|
| 149 |
+
.order_by(DocumentChunkModel.chunk_index)
|
| 150 |
+
)
|
| 151 |
+
models = result.scalars().all()
|
| 152 |
+
return [self._to_entity(model) for model in models]
|
| 153 |
+
|
| 154 |
+
async def delete_by_document_id(self, document_id: UUID) -> int:
|
| 155 |
+
"""Delete all chunks for a document"""
|
| 156 |
+
result = await self.session.execute(
|
| 157 |
+
select(DocumentChunkModel).where(DocumentChunkModel.document_id == document_id)
|
| 158 |
+
)
|
| 159 |
+
models = result.scalars().all()
|
| 160 |
+
|
| 161 |
+
for model in models:
|
| 162 |
+
await self.session.delete(model)
|
| 163 |
+
|
| 164 |
+
await self.session.commit()
|
| 165 |
+
return len(models)
|
| 166 |
+
|
| 167 |
+
def _to_entity(self, model: DocumentChunkModel) -> DocumentChunk:
|
| 168 |
+
"""Convert model to entity"""
|
| 169 |
+
return DocumentChunk(
|
| 170 |
+
id=model.id,
|
| 171 |
+
document_id=model.document_id,
|
| 172 |
+
chunk_index=model.chunk_index,
|
| 173 |
+
content=model.content,
|
| 174 |
+
token_count=model.token_count,
|
| 175 |
+
vector_id=model.vector_id,
|
| 176 |
+
metadata=model.metadata,
|
| 177 |
+
created_at=model.created_at,
|
| 178 |
+
)
|
app/main.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI Application - Main Entry Point
|
| 3 |
+
"""
|
| 4 |
+
from contextlib import asynccontextmanager
|
| 5 |
+
|
| 6 |
+
from fastapi import FastAPI
|
| 7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
from fastapi.responses import JSONResponse
|
| 9 |
+
|
| 10 |
+
from app.core.config import get_settings
|
| 11 |
+
from app.core.logging import get_logger, setup_logging
|
| 12 |
+
from app.presentation.api.v1.endpoints import router as api_router
|
| 13 |
+
|
| 14 |
+
settings = get_settings()
|
| 15 |
+
setup_logging(settings.log_level)
|
| 16 |
+
logger = get_logger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@asynccontextmanager
|
| 20 |
+
async def lifespan(app: FastAPI):
|
| 21 |
+
"""Application lifespan manager"""
|
| 22 |
+
logger.info("application_startup", version=settings.app_version, env=settings.environment)
|
| 23 |
+
|
| 24 |
+
# TODO: Initialize database connection pool
|
| 25 |
+
# TODO: Initialize Qdrant collection
|
| 26 |
+
# TODO: Warm up embedding model
|
| 27 |
+
|
| 28 |
+
yield
|
| 29 |
+
|
| 30 |
+
# Cleanup
|
| 31 |
+
logger.info("application_shutdown")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
app = FastAPI(
|
| 35 |
+
title=settings.app_name,
|
| 36 |
+
version=settings.app_version,
|
| 37 |
+
description="Production-ready RAG backend for corporate employee onboarding",
|
| 38 |
+
lifespan=lifespan,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# CORS
|
| 42 |
+
app.add_middleware(
|
| 43 |
+
CORSMiddleware,
|
| 44 |
+
allow_origins=settings.cors_origins,
|
| 45 |
+
allow_credentials=settings.cors_allow_credentials,
|
| 46 |
+
allow_methods=["*"],
|
| 47 |
+
allow_headers=["*"],
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Include routers
|
| 51 |
+
app.include_router(api_router)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@app.get("/")
|
| 55 |
+
async def root():
|
| 56 |
+
"""Root endpoint"""
|
| 57 |
+
return {
|
| 58 |
+
"service": settings.app_name,
|
| 59 |
+
"version": settings.app_version,
|
| 60 |
+
"status": "running",
|
| 61 |
+
"environment": settings.environment,
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
@app.exception_handler(Exception)
|
| 66 |
+
async def global_exception_handler(request, exc):
|
| 67 |
+
"""Global exception handler"""
|
| 68 |
+
logger.error("unhandled_exception", error=str(exc), exc_info=True)
|
| 69 |
+
return JSONResponse(
|
| 70 |
+
status_code=500,
|
| 71 |
+
content={"error": "Internal server error", "detail": str(exc)},
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
import uvicorn
|
| 77 |
+
|
| 78 |
+
uvicorn.run(
|
| 79 |
+
"app.main:app",
|
| 80 |
+
host=settings.host,
|
| 81 |
+
port=settings.port,
|
| 82 |
+
reload=settings.debug,
|
| 83 |
+
log_level=settings.log_level.lower(),
|
| 84 |
+
)
|
app/presentation/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Presentation layer"""
|
app/presentation/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""API layer"""
|
app/presentation/api/v1/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""API v1"""
|
app/presentation/api/v1/endpoints.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Presentation Layer - API Endpoints
|
| 3 |
+
"""
|
| 4 |
+
import time
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from typing import List
|
| 7 |
+
from uuid import uuid4
|
| 8 |
+
|
| 9 |
+
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
|
| 10 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 11 |
+
|
| 12 |
+
from app.application.dto import DocumentUploadDTO, QueryDTO
|
| 13 |
+
from app.application.services import ChunkingService
|
| 14 |
+
from app.application.use_cases.document_indexing import DocumentIndexingUseCase
|
| 15 |
+
from app.application.use_cases.query_processing import QueryProcessingUseCase
|
| 16 |
+
from app.core.config import get_settings
|
| 17 |
+
from app.core.logging import get_logger, set_correlation_id
|
| 18 |
+
from app.core.metrics import (
|
| 19 |
+
active_requests,
|
| 20 |
+
http_request_duration_seconds,
|
| 21 |
+
http_requests_total,
|
| 22 |
+
queries_total,
|
| 23 |
+
)
|
| 24 |
+
from app.infrastructure.cache.redis_cache import RedisCache
|
| 25 |
+
from app.infrastructure.external.embedder import SentenceTransformerEmbedder
|
| 26 |
+
from app.infrastructure.external.gemini_llm import GeminiLLM
|
| 27 |
+
from app.infrastructure.external.prompt_builder import DefaultPromptBuilder
|
| 28 |
+
from app.infrastructure.external.qdrant_retriever import QdrantRetriever
|
| 29 |
+
from app.infrastructure.repositories.postgres_repository import (
|
| 30 |
+
PostgresChunkRepository,
|
| 31 |
+
PostgresDocumentRepository,
|
| 32 |
+
)
|
| 33 |
+
from app.presentation.api.v1.schemas import (
|
| 34 |
+
DocumentResponse,
|
| 35 |
+
HealthResponse,
|
| 36 |
+
QueryRequest,
|
| 37 |
+
QueryResponse,
|
| 38 |
+
SourceSchema,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
router = APIRouter(prefix="/api/v1", tags=["api"])
|
| 42 |
+
logger = get_logger(__name__)
|
| 43 |
+
settings = get_settings()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Dependency injection (simplified - in production use proper DI container)
|
| 47 |
+
async def get_query_use_case() -> QueryProcessingUseCase:
|
| 48 |
+
"""Get query processing use case"""
|
| 49 |
+
# Initialize services
|
| 50 |
+
embedder = SentenceTransformerEmbedder(settings.embedding_model)
|
| 51 |
+
retriever = QdrantRetriever(
|
| 52 |
+
url=settings.qdrant_url,
|
| 53 |
+
collection_name=settings.qdrant_collection_name,
|
| 54 |
+
vector_size=settings.qdrant_vector_size,
|
| 55 |
+
api_key=settings.qdrant_api_key if settings.qdrant_api_key else None,
|
| 56 |
+
)
|
| 57 |
+
llm = GeminiLLM(api_key=settings.gemini_api_key, model_name=settings.gemini_model)
|
| 58 |
+
prompt_builder = DefaultPromptBuilder()
|
| 59 |
+
cache = RedisCache(redis_url=settings.redis_url)
|
| 60 |
+
|
| 61 |
+
# For now, using a simple reranker (in production use cross-encoder)
|
| 62 |
+
from app.infrastructure.external.simple_reranker import SimpleReranker
|
| 63 |
+
|
| 64 |
+
reranker = SimpleReranker()
|
| 65 |
+
|
| 66 |
+
return QueryProcessingUseCase(
|
| 67 |
+
retriever=retriever,
|
| 68 |
+
reranker=reranker,
|
| 69 |
+
llm=llm,
|
| 70 |
+
prompt_builder=prompt_builder,
|
| 71 |
+
cache=cache,
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
@router.post("/query", response_model=QueryResponse, status_code=status.HTTP_200_OK)
|
| 76 |
+
async def process_query(
|
| 77 |
+
request: QueryRequest,
|
| 78 |
+
use_case: QueryProcessingUseCase = Depends(get_query_use_case),
|
| 79 |
+
) -> QueryResponse:
|
| 80 |
+
"""Process user query through RAG pipeline"""
|
| 81 |
+
start_time = time.time()
|
| 82 |
+
correlation_id = str(uuid4())
|
| 83 |
+
set_correlation_id(correlation_id)
|
| 84 |
+
|
| 85 |
+
active_requests.inc()
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
logger.info("processing_query", query=request.query_text, department=request.department)
|
| 89 |
+
|
| 90 |
+
# Convert to DTO
|
| 91 |
+
query_dto = QueryDTO(
|
| 92 |
+
query_text=request.query_text,
|
| 93 |
+
department=request.department,
|
| 94 |
+
user_id=request.user_id,
|
| 95 |
+
session_id=request.session_id,
|
| 96 |
+
top_k=request.top_k,
|
| 97 |
+
temperature=request.temperature,
|
| 98 |
+
max_tokens=request.max_tokens,
|
| 99 |
+
filters=request.filters,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Execute use case
|
| 103 |
+
response_dto = await use_case.execute(query_dto)
|
| 104 |
+
|
| 105 |
+
# Convert to response schema
|
| 106 |
+
response = QueryResponse(
|
| 107 |
+
query_id=response_dto.query_id,
|
| 108 |
+
answer=response_dto.answer,
|
| 109 |
+
sources=[
|
| 110 |
+
SourceSchema(
|
| 111 |
+
title=src.title,
|
| 112 |
+
content=src.content,
|
| 113 |
+
relevance_score=src.relevance_score,
|
| 114 |
+
document_id=src.document_id,
|
| 115 |
+
chunk_index=src.chunk_index,
|
| 116 |
+
metadata=src.metadata,
|
| 117 |
+
)
|
| 118 |
+
for src in response_dto.sources
|
| 119 |
+
],
|
| 120 |
+
confidence=response_dto.confidence,
|
| 121 |
+
processing_time_ms=response_dto.processing_time_ms,
|
| 122 |
+
tokens_used=response_dto.tokens_used,
|
| 123 |
+
model=response_dto.model,
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
# Metrics
|
| 127 |
+
duration = time.time() - start_time
|
| 128 |
+
http_requests_total.labels(method="POST", endpoint="/api/v1/query", status="200").inc()
|
| 129 |
+
http_request_duration_seconds.labels(method="POST", endpoint="/api/v1/query").observe(
|
| 130 |
+
duration
|
| 131 |
+
)
|
| 132 |
+
queries_total.labels(department=request.department, status="success").inc()
|
| 133 |
+
|
| 134 |
+
logger.info("query_processed", query_id=response.query_id, duration_ms=int(duration * 1000))
|
| 135 |
+
|
| 136 |
+
return response
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logger.error("query_processing_error", error=str(e), exc_info=True)
|
| 140 |
+
http_requests_total.labels(method="POST", endpoint="/api/v1/query", status="500").inc()
|
| 141 |
+
queries_total.labels(department=request.department, status="error").inc()
|
| 142 |
+
raise HTTPException(status_code=500, detail=f"Query processing failed: {str(e)}")
|
| 143 |
+
|
| 144 |
+
finally:
|
| 145 |
+
active_requests.dec()
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
@router.get("/health", response_model=HealthResponse)
|
| 149 |
+
async def health_check() -> HealthResponse:
|
| 150 |
+
"""Health check endpoint"""
|
| 151 |
+
return HealthResponse(
|
| 152 |
+
status="healthy",
|
| 153 |
+
version=settings.app_version,
|
| 154 |
+
timestamp=datetime.utcnow(),
|
| 155 |
+
services={
|
| 156 |
+
"database": "unknown", # TODO: Add actual health checks
|
| 157 |
+
"redis": "unknown",
|
| 158 |
+
"qdrant": "unknown",
|
| 159 |
+
},
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
@router.get("/metrics")
|
| 164 |
+
async def metrics():
|
| 165 |
+
"""Prometheus metrics endpoint"""
|
| 166 |
+
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
| 167 |
+
|
| 168 |
+
return generate_latest()
|
app/presentation/api/v1/schemas.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Presentation Layer - Pydantic Schemas for API
|
| 3 |
+
"""
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from typing import List, Optional
|
| 6 |
+
|
| 7 |
+
from pydantic import BaseModel, Field
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class QueryRequest(BaseModel):
|
| 11 |
+
"""Query request schema"""
|
| 12 |
+
|
| 13 |
+
query_text: str = Field(..., min_length=1, max_length=5000, description="User question")
|
| 14 |
+
department: str = Field(..., description="Department context")
|
| 15 |
+
user_id: Optional[str] = Field(None, description="User identifier")
|
| 16 |
+
session_id: Optional[str] = Field(None, description="Session identifier")
|
| 17 |
+
top_k: int = Field(10, ge=1, le=50, description="Number of results to return")
|
| 18 |
+
temperature: float = Field(0.7, ge=0.0, le=1.0, description="LLM temperature")
|
| 19 |
+
max_tokens: int = Field(2048, ge=100, le=4096, description="Max tokens in response")
|
| 20 |
+
filters: dict = Field(default_factory=dict, description="Additional filters")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class SourceSchema(BaseModel):
|
| 24 |
+
"""Source citation schema"""
|
| 25 |
+
|
| 26 |
+
title: str
|
| 27 |
+
content: str
|
| 28 |
+
relevance_score: float
|
| 29 |
+
document_id: str
|
| 30 |
+
chunk_index: int
|
| 31 |
+
metadata: dict = Field(default_factory=dict)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class QueryResponse(BaseModel):
|
| 35 |
+
"""Query response schema"""
|
| 36 |
+
|
| 37 |
+
query_id: str
|
| 38 |
+
answer: str
|
| 39 |
+
sources: List[SourceSchema]
|
| 40 |
+
confidence: float
|
| 41 |
+
processing_time_ms: int
|
| 42 |
+
tokens_used: int
|
| 43 |
+
model: str
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class DocumentUploadRequest(BaseModel):
|
| 47 |
+
"""Document upload request schema"""
|
| 48 |
+
|
| 49 |
+
department: str = Field(..., description="Department for the document")
|
| 50 |
+
metadata: dict = Field(default_factory=dict, description="Additional metadata")
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class DocumentResponse(BaseModel):
|
| 54 |
+
"""Document response schema"""
|
| 55 |
+
|
| 56 |
+
id: str
|
| 57 |
+
title: str
|
| 58 |
+
filename: str
|
| 59 |
+
file_type: str
|
| 60 |
+
file_size: int
|
| 61 |
+
department: str
|
| 62 |
+
status: str
|
| 63 |
+
uploaded_at: datetime
|
| 64 |
+
indexed_at: Optional[datetime] = None
|
| 65 |
+
metadata: dict = Field(default_factory=dict)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class HealthResponse(BaseModel):
|
| 69 |
+
"""Health check response"""
|
| 70 |
+
|
| 71 |
+
status: str
|
| 72 |
+
version: str
|
| 73 |
+
timestamp: datetime
|
| 74 |
+
services: dict = Field(default_factory=dict)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class ErrorResponse(BaseModel):
|
| 78 |
+
"""Error response schema"""
|
| 79 |
+
|
| 80 |
+
error: str
|
| 81 |
+
detail: Optional[str] = None
|
| 82 |
+
request_id: Optional[str] = None
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.109.0
|
| 2 |
+
uvicorn[standard]==0.27.0
|
| 3 |
+
pydantic==2.5.3
|
| 4 |
+
pydantic-settings==2.1.0
|
| 5 |
+
sqlalchemy==2.0.25
|
| 6 |
+
asyncpg==0.29.0
|
| 7 |
+
redis[hiredis]==5.0.1
|
| 8 |
+
qdrant-client==1.7.3
|
| 9 |
+
sentence-transformers==2.3.1
|
| 10 |
+
google-generativeai==0.3.2
|
| 11 |
+
alembic==1.13.1
|
| 12 |
+
celery[redis]==5.3.6
|
| 13 |
+
prometheus-client==0.19.0
|
| 14 |
+
structlog==24.1.0
|
| 15 |
+
tenacity==8.2.3
|
| 16 |
+
httpx==0.26.0
|
| 17 |
+
python-multipart==0.0.6
|
| 18 |
+
aiofiles==23.2.1
|
| 19 |
+
psycopg2-binary==2.9.9
|