Baktabek commited on
Commit
409c17a
·
verified ·
1 Parent(s): 0766f5c

Upload folder using huggingface_hub

Browse files
Files changed (49) hide show
  1. .gitignore +8 -0
  2. Dockerfile +41 -0
  3. README.md +90 -10
  4. alembic.ini +40 -0
  5. alembic/env.py +66 -0
  6. alembic/script.py.mako +1 -0
  7. alembic/versions/001_initial_schema.py +95 -0
  8. app.py +23 -0
  9. app/__init__.py +5 -0
  10. app/application/__init__.py +1 -0
  11. app/application/dto/__init__.py +79 -0
  12. app/application/services/__init__.py +5 -0
  13. app/application/services/chunking_service.py +97 -0
  14. app/application/use_cases/__init__.py +1 -0
  15. app/application/use_cases/document_indexing.py +129 -0
  16. app/application/use_cases/query_processing.py +136 -0
  17. app/core/__init__.py +1 -0
  18. app/core/config.py +121 -0
  19. app/core/logging.py +75 -0
  20. app/core/metrics.py +98 -0
  21. app/domain/__init__.py +1 -0
  22. app/domain/entities/__init__.py +15 -0
  23. app/domain/entities/document.py +87 -0
  24. app/domain/entities/query.py +113 -0
  25. app/domain/interfaces/__init__.py +20 -0
  26. app/domain/interfaces/cache.py +36 -0
  27. app/domain/interfaces/llm.py +72 -0
  28. app/domain/interfaces/repository.py +60 -0
  29. app/domain/interfaces/retriever.py +75 -0
  30. app/infrastructure/__init__.py +1 -0
  31. app/infrastructure/cache/__init__.py +1 -0
  32. app/infrastructure/cache/redis_cache.py +84 -0
  33. app/infrastructure/database/__init__.py +1 -0
  34. app/infrastructure/database/models.py +82 -0
  35. app/infrastructure/external/__init__.py +1 -0
  36. app/infrastructure/external/embedder.py +31 -0
  37. app/infrastructure/external/gemini_llm.py +87 -0
  38. app/infrastructure/external/prompt_builder.py +54 -0
  39. app/infrastructure/external/qdrant_retriever.py +124 -0
  40. app/infrastructure/external/simple_reranker.py +20 -0
  41. app/infrastructure/repositories/__init__.py +1 -0
  42. app/infrastructure/repositories/postgres_repository.py +178 -0
  43. app/main.py +84 -0
  44. app/presentation/__init__.py +1 -0
  45. app/presentation/api/__init__.py +1 -0
  46. app/presentation/api/v1/__init__.py +1 -0
  47. app/presentation/api/v1/endpoints.py +168 -0
  48. app/presentation/api/v1/schemas.py +82 -0
  49. requirements.txt +19 -0
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ .env
4
+ .env.local
5
+ *.log
6
+ .pytest_cache/
7
+ .coverage
8
+ htmlcov/
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HuggingFace Space Dockerfile
2
+ FROM python:3.11-slim
3
+
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ build-essential \
9
+ curl \
10
+ git \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy requirements
14
+ COPY requirements.txt .
15
+
16
+ # Install Python dependencies
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy application code
20
+ COPY app ./app
21
+ COPY app.py .
22
+ COPY alembic ./alembic
23
+ COPY alembic.ini .
24
+
25
+ # Create non-root user
26
+ RUN useradd -m -u 1000 user && chown -R user:user /app
27
+ USER user
28
+
29
+ # HuggingFace Spaces uses port 7860
30
+ EXPOSE 7860
31
+
32
+ # Set environment
33
+ ENV PYTHONUNBUFFERED=1
34
+ ENV PORT=7860
35
+
36
+ # Health check
37
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
38
+ CMD curl -f http://localhost:7860/health || exit 1
39
+
40
+ # Run application
41
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,90 @@
1
- ---
2
- title: Rag Onboarding Backend
3
- emoji: 🌍
4
- colorFrom: purple
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RAG Onboarding Backend
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # RAG Onboarding Backend - HuggingFace Space
12
+
13
+ Production-ready RAG (Retrieval-Augmented Generation) backend for corporate employee onboarding, deployed on HuggingFace Spaces.
14
+
15
+ ## 🌟 Features
16
+
17
+ - **FastAPI REST API** - High-performance async API
18
+ - **HuggingFace Models** - Open-source LLMs and embeddings
19
+ - **Vector Search** - Qdrant for similarity search
20
+ - **Caching** - Redis for performance optimization
21
+ - **Monitoring** - Prometheus metrics
22
+ - **Clean Architecture** - Production-grade code structure
23
+
24
+ ## 🚀 Quick Start
25
+
26
+ ### API Endpoints
27
+
28
+ - `GET /` - Service info
29
+ - `GET /health` - Health check
30
+ - `POST /api/v1/query` - RAG query processing
31
+ - `GET /api/v1/metrics` - Prometheus metrics
32
+ - `GET /docs` - Interactive API documentation
33
+
34
+ ### Example Query
35
+
36
+ ```bash
37
+ curl -X POST "https://YOUR-SPACE-NAME.hf.space/api/v1/query" \
38
+ -H "Content-Type: application/json" \
39
+ -d '{
40
+ "query_text": "What is the onboarding process?",
41
+ "department": "HR",
42
+ "top_k": 5
43
+ }'
44
+ ```
45
+
46
+ ## 🔧 Configuration
47
+
48
+ Set the following secrets in your HuggingFace Space settings:
49
+
50
+ - `GEMINI_API_KEY` - Your Google Gemini API key
51
+ - `DATABASE_URL` - PostgreSQL connection string (use external DB like Supabase/Neon)
52
+ - `REDIS_URL` - Redis connection string (use Upstash Redis)
53
+ - `QDRANT_URL` - Qdrant vector DB URL (use Qdrant Cloud)
54
+ - `QDRANT_API_KEY` - Qdrant API key (if using cloud)
55
+
56
+ ## 📊 Models Used
57
+
58
+ - **LLM**: Google Gemini 2.0 Flash (via API)
59
+ - **Embeddings**: `sentence-transformers/all-MiniLM-L6-v2`
60
+ - **Reranking**: `cross-encoder/ms-marco-MiniLM-L-12-v2` (optional)
61
+
62
+ ## 🏗️ Architecture
63
+
64
+ ```
65
+ ┌─────────────┐
66
+ │ Client │
67
+ └──────┬──────┘
68
+
69
+
70
+ ┌─────────────┐
71
+ │ FastAPI │
72
+ └──────┬──────┘
73
+
74
+ ├─────► PostgreSQL (Documents)
75
+ ├─────► Redis (Cache)
76
+ ├─────► Qdrant (Vectors)
77
+ └─────► Google Gemini (LLM)
78
+ ```
79
+
80
+ ## 📝 License
81
+
82
+ MIT License - See LICENSE file for details
83
+
84
+ ## 🤝 Contributing
85
+
86
+ Contributions welcome! Please open an issue or submit a PR.
87
+
88
+ ## 📧 Support
89
+
90
+ For questions or issues, please open a GitHub issue.
alembic.ini ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [alembic]
2
+ script_location = alembic
3
+ prepend_sys_path = .
4
+ version_path_separator = os
5
+
6
+ sqlalchemy.url = postgresql+asyncpg://postgres:postgres@localhost:5432/rag_onboarding
7
+
8
+ [loggers]
9
+ keys = root,sqlalchemy,alembic
10
+
11
+ [handlers]
12
+ keys = console
13
+
14
+ [formatters]
15
+ keys = generic
16
+
17
+ [logger_root]
18
+ level = WARN
19
+ handlers = console
20
+ qualname =
21
+
22
+ [logger_sqlalchemy]
23
+ level = WARN
24
+ handlers =
25
+ qualname = sqlalchemy.engine
26
+
27
+ [logger_alembic]
28
+ level = INFO
29
+ handlers =
30
+ qualname = alembic
31
+
32
+ [handler_console]
33
+ class = StreamHandler
34
+ args = (sys.stderr,)
35
+ level = NOTSET
36
+ formatter = generic
37
+
38
+ [formatter_generic]
39
+ format = %(levelname)-5.5s [%(name)s] %(message)s
40
+ datefmt = %H:%M:%S
alembic/env.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Alembic environment configuration"""
2
+ import asyncio
3
+ from logging.config import fileConfig
4
+
5
+ from alembic import context
6
+ from sqlalchemy import pool
7
+ from sqlalchemy.engine import Connection
8
+ from sqlalchemy.ext.asyncio import async_engine_from_config
9
+
10
+ from app.infrastructure.database.models import Base
11
+
12
+ # Alembic Config object
13
+ config = context.config
14
+
15
+ # Interpret the config file for Python logging
16
+ if config.config_file_name is not None:
17
+ fileConfig(config.config_file_name)
18
+
19
+ # Metadata for autogenerate
20
+ target_metadata = Base.metadata
21
+
22
+
23
+ def run_migrations_offline() -> None:
24
+ """Run migrations in 'offline' mode."""
25
+ url = config.get_main_option("sqlalchemy.url")
26
+ context.configure(
27
+ url=url,
28
+ target_metadata=target_metadata,
29
+ literal_binds=True,
30
+ dialect_opts={"paramstyle": "named"},
31
+ )
32
+
33
+ with context.begin_transaction():
34
+ context.run_migrations()
35
+
36
+
37
+ def do_run_migrations(connection: Connection) -> None:
38
+ context.configure(connection=connection, target_metadata=target_metadata)
39
+
40
+ with context.begin_transaction():
41
+ context.run_migrations()
42
+
43
+
44
+ async def run_async_migrations() -> None:
45
+ """Run migrations in 'online' mode."""
46
+ connectable = async_engine_from_config(
47
+ config.get_section(config.config_ini_section, {}),
48
+ prefix="sqlalchemy.",
49
+ poolclass=pool.NullPool,
50
+ )
51
+
52
+ async with connectable.connect() as connection:
53
+ await connection.run_sync(do_run_migrations)
54
+
55
+ await connectable.dispose()
56
+
57
+
58
+ def run_migrations_online() -> None:
59
+ """Run migrations in 'online' mode."""
60
+ asyncio.run(run_async_migrations())
61
+
62
+
63
+ if context.is_offline_mode():
64
+ run_migrations_offline()
65
+ else:
66
+ run_migrations_online()
alembic/script.py.mako ADDED
@@ -0,0 +1 @@
 
 
1
+ """Script configuration"""
alembic/versions/001_initial_schema.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Migration - Initial Schema
3
+
4
+ Create tables for documents, chunks, and queries.
5
+ """
6
+ from alembic import op
7
+ import sqlalchemy as sa
8
+ from sqlalchemy.dialects import postgresql
9
+
10
+ # revision identifiers
11
+ revision = '001'
12
+ down_revision = None
13
+ branch_labels = None
14
+ depends_on = None
15
+
16
+
17
+ def upgrade() -> None:
18
+ # Create documents table
19
+ op.create_table(
20
+ 'documents',
21
+ sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
22
+ sa.Column('title', sa.String(500), nullable=False),
23
+ sa.Column('filename', sa.String(255), nullable=False),
24
+ sa.Column('file_type', sa.String(50), nullable=False),
25
+ sa.Column('file_size', sa.BigInteger(), nullable=False),
26
+ sa.Column('storage_path', sa.String(1000), nullable=False),
27
+ sa.Column('department', sa.String(100), nullable=False),
28
+ sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
29
+ sa.Column('upload_session_id', sa.String(100), nullable=True),
30
+ sa.Column('uploaded_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
31
+ sa.Column('indexed_at', sa.DateTime(timezone=True), nullable=True),
32
+ sa.Column('metadata', postgresql.JSONB(), nullable=False, server_default='{}'),
33
+ sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
34
+ sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
35
+ )
36
+
37
+ # Create indexes for documents
38
+ op.create_index('ix_documents_title', 'documents', ['title'])
39
+ op.create_index('ix_documents_file_type', 'documents', ['file_type'])
40
+ op.create_index('ix_documents_department', 'documents', ['department'])
41
+ op.create_index('ix_documents_status', 'documents', ['status'])
42
+ op.create_index('ix_documents_department_status', 'documents', ['department', 'status'])
43
+ op.create_index('ix_documents_created_at', 'documents', ['created_at'])
44
+
45
+ # Create document_chunks table
46
+ op.create_table(
47
+ 'document_chunks',
48
+ sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
49
+ sa.Column('document_id', postgresql.UUID(as_uuid=True), nullable=False),
50
+ sa.Column('chunk_index', sa.Integer(), nullable=False),
51
+ sa.Column('content', sa.Text(), nullable=False),
52
+ sa.Column('token_count', sa.Integer(), nullable=False),
53
+ sa.Column('vector_id', sa.String(100), nullable=True),
54
+ sa.Column('metadata', postgresql.JSONB(), nullable=False, server_default='{}'),
55
+ sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
56
+ )
57
+
58
+ # Create indexes for chunks
59
+ op.create_index('ix_chunks_document_id', 'document_chunks', ['document_id'])
60
+ op.create_index('ix_chunks_vector_id', 'document_chunks', ['vector_id'])
61
+ op.create_index('ix_chunks_document_id_index', 'document_chunks', ['document_id', 'chunk_index'])
62
+
63
+ # Create queries table
64
+ op.create_table(
65
+ 'queries',
66
+ sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
67
+ sa.Column('query_text', sa.Text(), nullable=False),
68
+ sa.Column('department', sa.String(100), nullable=False),
69
+ sa.Column('user_id', sa.String(100), nullable=True),
70
+ sa.Column('session_id', sa.String(100), nullable=True),
71
+ sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
72
+ sa.Column('answer', sa.Text(), nullable=True),
73
+ sa.Column('sources', postgresql.JSONB(), nullable=False, server_default='[]'),
74
+ sa.Column('confidence', sa.Integer(), nullable=False, server_default='0'),
75
+ sa.Column('duration_ms', sa.Integer(), nullable=False, server_default='0'),
76
+ sa.Column('tokens_used', sa.Integer(), nullable=False, server_default='0'),
77
+ sa.Column('model', sa.String(100), nullable=True),
78
+ sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
79
+ sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
80
+ )
81
+
82
+ # Create indexes for queries
83
+ op.create_index('ix_queries_department', 'queries', ['department'])
84
+ op.create_index('ix_queries_user_id', 'queries', ['user_id'])
85
+ op.create_index('ix_queries_session_id', 'queries', ['session_id'])
86
+ op.create_index('ix_queries_status', 'queries', ['status'])
87
+ op.create_index('ix_queries_created_at', 'queries', ['created_at'])
88
+ op.create_index('ix_queries_department_created', 'queries', ['department', 'created_at'])
89
+ op.create_index('ix_queries_user_created', 'queries', ['user_id', 'created_at'])
90
+
91
+
92
+ def downgrade() -> None:
93
+ op.drop_table('queries')
94
+ op.drop_table('document_chunks')
95
+ op.drop_table('documents')
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HuggingFace Space Entry Point
3
+ Wraps FastAPI app for Gradio/Spaces deployment
4
+ """
5
+ import os
6
+ import sys
7
+
8
+ # Add app to path
9
+ sys.path.insert(0, os.path.dirname(__file__))
10
+
11
+ from app.main import app
12
+
13
+ # For HF Spaces
14
+ if __name__ == "__main__":
15
+ import uvicorn
16
+
17
+ port = int(os.getenv("PORT", 7860)) # HF Spaces default port
18
+ uvicorn.run(
19
+ app,
20
+ host="0.0.0.0",
21
+ port=port,
22
+ log_level="info"
23
+ )
app/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """
2
+ RAG Onboarding Backend - Production-ready RAG system for employee onboarding
3
+ """
4
+
5
+ __version__ = "1.0.0"
app/application/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Application layer"""
app/application/dto/__init__.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Application Layer - DTOs (Data Transfer Objects)
3
+ """
4
+ from dataclasses import dataclass
5
+ from typing import List, Optional
6
+ from uuid import UUID
7
+
8
+
9
+ @dataclass
10
+ class QueryDTO:
11
+ """Query data transfer object"""
12
+
13
+ query_text: str
14
+ department: str
15
+ user_id: Optional[str] = None
16
+ session_id: Optional[str] = None
17
+ top_k: int = 10
18
+ temperature: float = 0.7
19
+ max_tokens: int = 2048
20
+ filters: dict = None
21
+
22
+ def __post_init__(self) -> None:
23
+ if self.filters is None:
24
+ self.filters = {}
25
+
26
+
27
+ @dataclass
28
+ class SourceDTO:
29
+ """Source citation DTO"""
30
+
31
+ title: str
32
+ content: str
33
+ relevance_score: float
34
+ document_id: str
35
+ chunk_index: int
36
+ metadata: dict
37
+
38
+
39
+ @dataclass
40
+ class QueryResponseDTO:
41
+ """Query response DTO"""
42
+
43
+ query_id: str
44
+ answer: str
45
+ sources: List[SourceDTO]
46
+ confidence: float
47
+ processing_time_ms: int
48
+ tokens_used: int
49
+ model: str
50
+
51
+
52
+ @dataclass
53
+ class DocumentUploadDTO:
54
+ """Document upload DTO"""
55
+
56
+ filename: str
57
+ content: bytes
58
+ department: str
59
+ metadata: dict = None
60
+
61
+ def __post_init__(self) -> None:
62
+ if self.metadata is None:
63
+ self.metadata = {}
64
+
65
+
66
+ @dataclass
67
+ class DocumentDTO:
68
+ """Document DTO"""
69
+
70
+ id: str
71
+ title: str
72
+ filename: str
73
+ file_type: str
74
+ file_size: int
75
+ department: str
76
+ status: str
77
+ uploaded_at: str
78
+ indexed_at: Optional[str] = None
79
+ metadata: dict = None
app/application/services/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Application services"""
2
+
3
+ from app.application.services.chunking_service import ChunkingService
4
+
5
+ __all__ = ["ChunkingService"]
app/application/services/chunking_service.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Application Layer - Chunking Service
3
+
4
+ Handles intelligent document chunking.
5
+ """
6
+ import re
7
+ from typing import List
8
+ from uuid import UUID
9
+
10
+ from app.domain.entities import DocumentChunk
11
+
12
+
13
+ class ChunkingService:
14
+ """Service for chunking documents intelligently"""
15
+
16
+ def __init__(
17
+ self,
18
+ chunk_size: int = 800,
19
+ chunk_overlap: int = 100,
20
+ min_chunk_size: int = 100,
21
+ ):
22
+ self.chunk_size = chunk_size
23
+ self.chunk_overlap = chunk_overlap
24
+ self.min_chunk_size = min_chunk_size
25
+
26
+ async def chunk_text(
27
+ self, text: str, document_id: UUID, metadata: dict = None
28
+ ) -> List[DocumentChunk]:
29
+ """Chunk text using semantic boundaries"""
30
+ if metadata is None:
31
+ metadata = {}
32
+
33
+ # 1. Split by paragraphs
34
+ paragraphs = self._split_paragraphs(text)
35
+
36
+ # 2. Combine into chunks
37
+ chunks = []
38
+ current_chunk = []
39
+ current_size = 0
40
+
41
+ for i, para in enumerate(paragraphs):
42
+ para_tokens = self._count_tokens(para)
43
+
44
+ if current_size + para_tokens > self.chunk_size and current_chunk:
45
+ # Flush current chunk
46
+ chunk_text = "\n\n".join(current_chunk)
47
+ chunks.append(chunk_text)
48
+
49
+ # Start new chunk with overlap
50
+ overlap_text = self._get_overlap(current_chunk)
51
+ current_chunk = [overlap_text, para] if overlap_text else [para]
52
+ current_size = self._count_tokens("\n\n".join(current_chunk))
53
+ else:
54
+ current_chunk.append(para)
55
+ current_size += para_tokens
56
+
57
+ # Flush remaining
58
+ if current_chunk:
59
+ chunks.append("\n\n".join(current_chunk))
60
+
61
+ # 3. Create DocumentChunk entities
62
+ return [
63
+ DocumentChunk(
64
+ document_id=document_id,
65
+ chunk_index=idx,
66
+ content=chunk,
67
+ token_count=self._count_tokens(chunk),
68
+ metadata=metadata,
69
+ )
70
+ for idx, chunk in enumerate(chunks)
71
+ if self._count_tokens(chunk) >= self.min_chunk_size
72
+ ]
73
+
74
+ def _split_paragraphs(self, text: str) -> List[str]:
75
+ """Split text into paragraphs"""
76
+ # Split by double newlines, headers, etc.
77
+ paragraphs = re.split(r"\n\s*\n", text)
78
+ return [p.strip() for p in paragraphs if p.strip()]
79
+
80
+ def _count_tokens(self, text: str) -> int:
81
+ """Approximate token count (1 token ≈ 4 chars)"""
82
+ return len(text) // 4
83
+
84
+ def _get_overlap(self, chunks: List[str]) -> str:
85
+ """Get overlap text from previous chunks"""
86
+ if not chunks:
87
+ return ""
88
+
89
+ # Take last chunk and truncate to overlap size
90
+ last_chunk = chunks[-1]
91
+ tokens = last_chunk.split()
92
+ overlap_tokens = int(self.chunk_overlap * 0.25) # Rough token estimate
93
+
94
+ if len(tokens) <= overlap_tokens:
95
+ return last_chunk
96
+
97
+ return " ".join(tokens[-overlap_tokens:])
app/application/use_cases/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Use cases"""
app/application/use_cases/document_indexing.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Application Layer - Document Indexing Use Case
3
+
4
+ Handles document upload and indexing into the knowledge base.
5
+ """
6
+ import hashlib
7
+ from pathlib import Path
8
+ from typing import List
9
+ from uuid import uuid4
10
+
11
+ from app.application.dto import DocumentDTO, DocumentUploadDTO
12
+ from app.domain.entities import Document, DocumentChunk, DocumentStatus, DocumentType
13
+ from app.domain.interfaces import IChunkRepository, IDocumentRepository, IEmbedder
14
+
15
+
16
+ class DocumentIndexingUseCase:
17
+ """Use case for indexing documents into the knowledge base"""
18
+
19
+ def __init__(
20
+ self,
21
+ document_repository: IDocumentRepository,
22
+ chunk_repository: IChunkRepository,
23
+ embedder: IEmbedder,
24
+ chunking_service: "ChunkingService",
25
+ ):
26
+ self.document_repository = document_repository
27
+ self.chunk_repository = chunk_repository
28
+ self.embedder = embedder
29
+ self.chunking_service = chunking_service
30
+
31
+ async def execute(self, upload_dto: DocumentUploadDTO) -> DocumentDTO:
32
+ """Execute document indexing pipeline"""
33
+
34
+ # 1. Detect file type
35
+ file_type = self._detect_file_type(upload_dto.filename)
36
+
37
+ # 2. Create document entity
38
+ document = Document(
39
+ title=self._extract_title(upload_dto.filename),
40
+ filename=upload_dto.filename,
41
+ file_type=file_type,
42
+ file_size=len(upload_dto.content),
43
+ storage_path=self._generate_storage_path(upload_dto.filename),
44
+ department=upload_dto.department,
45
+ metadata=upload_dto.metadata,
46
+ )
47
+
48
+ # 3. Save document to repository
49
+ saved_document = await self.document_repository.create(document)
50
+
51
+ # 4. Mark as processing
52
+ saved_document.mark_as_processing()
53
+ await self.document_repository.update(saved_document)
54
+
55
+ try:
56
+ # 5. Extract text content
57
+ text_content = await self._extract_text(upload_dto.content, file_type)
58
+
59
+ # 6. Chunk the document
60
+ chunks_data = await self.chunking_service.chunk_text(
61
+ text=text_content, document_id=saved_document.id, metadata=upload_dto.metadata
62
+ )
63
+
64
+ # 7. Generate embeddings
65
+ texts = [chunk.content for chunk in chunks_data]
66
+ embeddings = await self.embedder.embed_texts(texts)
67
+
68
+ # 8. Store chunks with embeddings
69
+ # (Vector storage will be handled in infrastructure layer)
70
+ chunks = await self.chunk_repository.create_bulk(chunks_data)
71
+
72
+ # 9. Mark document as indexed
73
+ saved_document.mark_as_indexed()
74
+ await self.document_repository.update(saved_document)
75
+
76
+ # 10. Return DTO
77
+ return self._to_dto(saved_document)
78
+
79
+ except Exception as e:
80
+ # Mark as failed
81
+ saved_document.mark_as_failed()
82
+ await self.document_repository.update(saved_document)
83
+ raise
84
+
85
+ def _detect_file_type(self, filename: str) -> DocumentType:
86
+ """Detect file type from filename"""
87
+ suffix = Path(filename).suffix.lower()
88
+ type_map = {
89
+ ".pdf": DocumentType.PDF,
90
+ ".docx": DocumentType.DOCX,
91
+ ".txt": DocumentType.TXT,
92
+ ".md": DocumentType.MD,
93
+ ".html": DocumentType.HTML,
94
+ }
95
+ return type_map.get(suffix, DocumentType.TXT)
96
+
97
+ def _extract_title(self, filename: str) -> str:
98
+ """Extract title from filename"""
99
+ return Path(filename).stem.replace("_", " ").replace("-", " ").title()
100
+
101
+ def _generate_storage_path(self, filename: str) -> str:
102
+ """Generate unique storage path"""
103
+ file_hash = hashlib.md5(f"{uuid4()}{filename}".encode()).hexdigest()
104
+ return f"documents/{file_hash[:2]}/{file_hash}/{filename}"
105
+
106
+ async def _extract_text(self, content: bytes, file_type: DocumentType) -> str:
107
+ """Extract text from document content"""
108
+ # Simplified - in production use proper libraries
109
+ # (PyPDF2, python-docx, BeautifulSoup, etc.)
110
+ if file_type == DocumentType.TXT or file_type == DocumentType.MD:
111
+ return content.decode("utf-8")
112
+ else:
113
+ # Placeholder - implement proper extraction
114
+ return content.decode("utf-8", errors="ignore")
115
+
116
+ def _to_dto(self, document: Document) -> DocumentDTO:
117
+ """Convert Document entity to DTO"""
118
+ return DocumentDTO(
119
+ id=str(document.id),
120
+ title=document.title,
121
+ filename=document.filename,
122
+ file_type=document.file_type.value,
123
+ file_size=document.file_size,
124
+ department=document.department,
125
+ status=document.status.value,
126
+ uploaded_at=document.uploaded_at.isoformat(),
127
+ indexed_at=document.indexed_at.isoformat() if document.indexed_at else None,
128
+ metadata=document.metadata,
129
+ )
app/application/use_cases/query_processing.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Application Layer - Query Processing Use Case
3
+
4
+ Orchestrates the RAG pipeline for answering user queries.
5
+ """
6
+ import time
7
+ from typing import List
8
+
9
+ from app.application.dto import QueryDTO, QueryResponseDTO, SourceDTO
10
+ from app.domain.entities import Query, QueryRequest, Source
11
+ from app.domain.interfaces import ILLM, ICache, IPromptBuilder, IReranker, IRetriever
12
+
13
+
14
+ class QueryProcessingUseCase:
15
+ """Use case for processing user queries through RAG pipeline"""
16
+
17
+ def __init__(
18
+ self,
19
+ retriever: IRetriever,
20
+ reranker: IReranker,
21
+ llm: ILLM,
22
+ prompt_builder: IPromptBuilder,
23
+ cache: ICache,
24
+ ):
25
+ self.retriever = retriever
26
+ self.reranker = reranker
27
+ self.llm = llm
28
+ self.prompt_builder = prompt_builder
29
+ self.cache = cache
30
+
31
+ async def execute(self, query_dto: QueryDTO) -> QueryResponseDTO:
32
+ """Execute query processing pipeline"""
33
+ start_time = time.time()
34
+
35
+ # 1. Create query request
36
+ query_request = QueryRequest(
37
+ query_text=query_dto.query_text,
38
+ department=query_dto.department,
39
+ user_id=query_dto.user_id,
40
+ session_id=query_dto.session_id,
41
+ top_k=query_dto.top_k,
42
+ temperature=query_dto.temperature,
43
+ max_tokens=query_dto.max_tokens,
44
+ filters=query_dto.filters,
45
+ )
46
+
47
+ # 2. Check semantic cache
48
+ cache_key = f"query:{hash(query_dto.query_text)}:{query_dto.department}"
49
+ cached_response = await self.cache.get(cache_key)
50
+ if cached_response:
51
+ return cached_response
52
+
53
+ # 3. Retrieve relevant documents
54
+ filters = {"department": query_dto.department}
55
+ if query_dto.filters:
56
+ filters.update(query_dto.filters)
57
+
58
+ retrieval_results = await self.retriever.hybrid_search(
59
+ query=query_dto.query_text,
60
+ top_k=100, # Initial retrieval
61
+ alpha=0.5,
62
+ filters=filters,
63
+ )
64
+
65
+ # 4. Rerank results
66
+ reranked_results = await self.reranker.rerank(
67
+ query=query_dto.query_text, results=retrieval_results, top_k=query_dto.top_k
68
+ )
69
+
70
+ # 5. Build context
71
+ context = [result.content for result in reranked_results]
72
+
73
+ # 6. Build prompt
74
+ messages = self.prompt_builder.build_rag_prompt(
75
+ query=query_dto.query_text,
76
+ context=context,
77
+ system_prompt=self._get_system_prompt(query_dto.department),
78
+ )
79
+
80
+ # 7. Generate answer
81
+ llm_response = await self.llm.generate(
82
+ messages=messages,
83
+ temperature=query_dto.temperature,
84
+ max_tokens=query_dto.max_tokens,
85
+ )
86
+
87
+ # 8. Create sources
88
+ sources = [
89
+ SourceDTO(
90
+ title=f"Document {result.document_id}",
91
+ content=result.content[:500], # Truncate for response
92
+ relevance_score=result.score,
93
+ document_id=result.document_id,
94
+ chunk_index=result.chunk_index,
95
+ metadata=result.metadata,
96
+ )
97
+ for result in reranked_results
98
+ ]
99
+
100
+ # 9. Calculate metrics
101
+ processing_time_ms = int((time.time() - start_time) * 1000)
102
+
103
+ # 10. Build response
104
+ response = QueryResponseDTO(
105
+ query_id=str(query_request.id) if hasattr(query_request, "id") else "temp",
106
+ answer=llm_response.content,
107
+ sources=sources,
108
+ confidence=self._calculate_confidence(reranked_results),
109
+ processing_time_ms=processing_time_ms,
110
+ tokens_used=llm_response.tokens_used,
111
+ model=llm_response.model,
112
+ )
113
+
114
+ # 11. Cache response
115
+ await self.cache.set(cache_key, response, ttl=3600)
116
+
117
+ return response
118
+
119
+ def _get_system_prompt(self, department: str) -> str:
120
+ """Get department-specific system prompt"""
121
+ prompts = {
122
+ "HR": "You are a helpful HR assistant for employee onboarding. Provide clear, accurate information about HR policies, benefits, and procedures.",
123
+ "IT": "You are an IT support assistant for new employees. Help with technical setup, access, and IT policies.",
124
+ "Legal": "You are a legal compliance assistant. Provide information about legal policies, regulations, and compliance requirements.",
125
+ "Finance": "You are a finance assistant. Help with expense policies, financial procedures, and budget information.",
126
+ "General": "You are a helpful corporate onboarding assistant. Provide accurate information to help new employees integrate successfully.",
127
+ }
128
+ return prompts.get(department, prompts["General"])
129
+
130
+ def _calculate_confidence(self, results: List) -> float:
131
+ """Calculate confidence score based on retrieval results"""
132
+ if not results:
133
+ return 0.0
134
+ # Average of top 3 scores
135
+ top_scores = [r.score for r in results[:3]]
136
+ return sum(top_scores) / len(top_scores) if top_scores else 0.0
app/core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Core utilities"""
app/core/config.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core - Configuration Management
3
+
4
+ Handles application configuration using Pydantic Settings.
5
+ """
6
+ from functools import lru_cache
7
+ from typing import List
8
+
9
+ from pydantic import Field
10
+ from pydantic_settings import BaseSettings, SettingsConfigDict
11
+
12
+
13
+ class Settings(BaseSettings):
14
+ """Application settings"""
15
+
16
+ model_config = SettingsConfigDict(env_file=".env", case_sensitive=False)
17
+
18
+ # Application
19
+ app_name: str = Field(default="RAG Onboarding Backend")
20
+ app_version: str = Field(default="1.0.0")
21
+ environment: str = Field(default="development")
22
+ debug: bool = Field(default=True)
23
+ log_level: str = Field(default="INFO")
24
+
25
+ # Server
26
+ host: str = Field(default="0.0.0.0")
27
+ port: int = Field(default=8000)
28
+ workers: int = Field(default=4)
29
+
30
+ # Database
31
+ database_url: str = Field(
32
+ default="postgresql+asyncpg://postgres:postgres@localhost:5432/rag_onboarding"
33
+ )
34
+ database_pool_size: int = Field(default=20)
35
+ database_max_overflow: int = Field(default=0)
36
+
37
+ # Redis
38
+ redis_url: str = Field(default="redis://localhost:6379/0")
39
+ redis_cache_ttl: int = Field(default=3600)
40
+
41
+ # Qdrant
42
+ qdrant_url: str = Field(default="http://localhost:6333")
43
+ qdrant_api_key: str = Field(default="")
44
+ qdrant_collection_name: str = Field(default="onboarding_documents")
45
+ qdrant_vector_size: int = Field(default=384)
46
+
47
+ # RabbitMQ / Celery
48
+ rabbitmq_url: str = Field(default="amqp://guest:guest@localhost:5672/")
49
+ celery_broker_url: str = Field(default="redis://localhost:6379/1")
50
+ celery_result_backend: str = Field(default="redis://localhost:6379/2")
51
+
52
+ # Gemini
53
+ gemini_api_key: str = Field(default="")
54
+ gemini_model: str = Field(default="gemini-2.0-flash")
55
+ gemini_temperature: float = Field(default=0.7)
56
+ gemini_max_tokens: int = Field(default=2048)
57
+
58
+ # OpenAI (fallback)
59
+ openai_api_key: str = Field(default="")
60
+ openai_model: str = Field(default="gpt-4-turbo-preview")
61
+
62
+ # Embeddings
63
+ embedding_model: str = Field(default="sentence-transformers/all-MiniLM-L6-v2")
64
+ embedding_dimension: int = Field(default=384)
65
+ embedding_batch_size: int = Field(default=32)
66
+
67
+ # RAG Configuration
68
+ rag_initial_k: int = Field(default=100)
69
+ rag_final_k: int = Field(default=10)
70
+ rag_min_score: float = Field(default=0.7)
71
+ rag_search_type: str = Field(default="hybrid")
72
+ rag_hybrid_alpha: float = Field(default=0.5)
73
+ rag_max_context_tokens: int = Field(default=4000)
74
+
75
+ # Reranking
76
+ rerank_model: str = Field(default="cross-encoder/ms-marco-MiniLM-L-12-v2")
77
+ use_reranking: bool = Field(default=True)
78
+
79
+ # Caching
80
+ enable_semantic_cache: bool = Field(default=True)
81
+ cache_embedding_ttl: int = Field(default=86400)
82
+ cache_retrieval_ttl: int = Field(default=3600)
83
+ cache_generation_ttl: int = Field(default=1800)
84
+
85
+ # Circuit Breaker
86
+ circuit_breaker_failure_threshold: int = Field(default=5)
87
+ circuit_breaker_recovery_timeout: int = Field(default=60)
88
+
89
+ # Retry Policy
90
+ retry_max_attempts: int = Field(default=3)
91
+ retry_wait_exponential_multiplier: int = Field(default=1)
92
+ retry_wait_exponential_max: int = Field(default=10)
93
+
94
+ # Rate Limiting
95
+ rate_limit_enabled: bool = Field(default=True)
96
+ rate_limit_per_minute: int = Field(default=60)
97
+ rate_limit_per_hour: int = Field(default=1000)
98
+
99
+ # Monitoring
100
+ prometheus_port: int = Field(default=9090)
101
+ enable_tracing: bool = Field(default=True)
102
+ jaeger_agent_host: str = Field(default="localhost")
103
+ jaeger_agent_port: int = Field(default=6831)
104
+ trace_sample_rate: float = Field(default=0.1)
105
+
106
+ # CORS
107
+ cors_origins: List[str] = Field(
108
+ default=["http://localhost:3000", "http://localhost:8000"]
109
+ )
110
+ cors_allow_credentials: bool = Field(default=True)
111
+
112
+ # Security
113
+ secret_key: str = Field(default="your-secret-key-change-in-production")
114
+ algorithm: str = Field(default="HS256")
115
+ access_token_expire_minutes: int = Field(default=30)
116
+
117
+
118
+ @lru_cache
119
+ def get_settings() -> Settings:
120
+ """Get cached settings instance"""
121
+ return Settings()
app/core/logging.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core - Structured Logging
3
+
4
+ JSON structured logging with correlation IDs.
5
+ """
6
+ import logging
7
+ import sys
8
+ from contextvars import ContextVar
9
+ from datetime import datetime
10
+ from typing import Any, Dict
11
+
12
+ import structlog
13
+
14
+ # Correlation ID context variable
15
+ correlation_id_var: ContextVar[str] = ContextVar("correlation_id", default="")
16
+
17
+
18
+ def get_correlation_id() -> str:
19
+ """Get current correlation ID"""
20
+ return correlation_id_var.get()
21
+
22
+
23
+ def set_correlation_id(correlation_id: str) -> None:
24
+ """Set correlation ID for current context"""
25
+ correlation_id_var.set(correlation_id)
26
+
27
+
28
+ def add_correlation_id(logger: Any, method_name: str, event_dict: Dict) -> Dict:
29
+ """Add correlation ID to log event"""
30
+ event_dict["correlation_id"] = get_correlation_id()
31
+ return event_dict
32
+
33
+
34
+ def add_service_info(logger: Any, method_name: str, event_dict: Dict) -> Dict:
35
+ """Add service information to log event"""
36
+ event_dict["service"] = "rag-onboarding-backend"
37
+ event_dict["version"] = "1.0.0"
38
+ return event_dict
39
+
40
+
41
+ def setup_logging(log_level: str = "INFO") -> None:
42
+ """Setup structured logging"""
43
+
44
+ # Configure structlog
45
+ structlog.configure(
46
+ processors=[
47
+ structlog.contextvars.merge_contextvars,
48
+ structlog.stdlib.filter_by_level,
49
+ structlog.processors.TimeStamper(fmt="iso"),
50
+ structlog.stdlib.add_logger_name,
51
+ structlog.stdlib.add_log_level,
52
+ structlog.processors.StackInfoRenderer(),
53
+ add_correlation_id,
54
+ add_service_info,
55
+ structlog.processors.format_exc_info,
56
+ structlog.processors.UnicodeDecoder(),
57
+ structlog.processors.JSONRenderer(),
58
+ ],
59
+ wrapper_class=structlog.stdlib.BoundLogger,
60
+ context_class=dict,
61
+ logger_factory=structlog.stdlib.LoggerFactory(),
62
+ cache_logger_on_first_use=True,
63
+ )
64
+
65
+ # Configure standard logging
66
+ logging.basicConfig(
67
+ format="%(message)s",
68
+ stream=sys.stdout,
69
+ level=getattr(logging, log_level.upper()),
70
+ )
71
+
72
+
73
+ def get_logger(name: str) -> structlog.stdlib.BoundLogger:
74
+ """Get logger instance"""
75
+ return structlog.get_logger(name)
app/core/metrics.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core - Prometheus Metrics
3
+
4
+ Application metrics for monitoring.
5
+ """
6
+ from prometheus_client import Counter, Gauge, Histogram
7
+
8
+ # Request metrics
9
+ http_requests_total = Counter(
10
+ "http_requests_total",
11
+ "Total HTTP requests",
12
+ ["method", "endpoint", "status"],
13
+ )
14
+
15
+ http_request_duration_seconds = Histogram(
16
+ "http_request_duration_seconds",
17
+ "HTTP request duration in seconds",
18
+ ["method", "endpoint"],
19
+ buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0],
20
+ )
21
+
22
+ # RAG Pipeline metrics
23
+ rag_retrieval_duration_seconds = Histogram(
24
+ "rag_retrieval_duration_seconds",
25
+ "RAG retrieval phase duration",
26
+ ["strategy"],
27
+ buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0],
28
+ )
29
+
30
+ rag_reranking_duration_seconds = Histogram(
31
+ "rag_reranking_duration_seconds",
32
+ "RAG reranking phase duration",
33
+ buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0],
34
+ )
35
+
36
+ llm_generation_duration_seconds = Histogram(
37
+ "llm_generation_duration_seconds",
38
+ "LLM generation duration",
39
+ ["model"],
40
+ buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 20.0],
41
+ )
42
+
43
+ llm_tokens_used_total = Counter(
44
+ "llm_tokens_used_total",
45
+ "Total LLM tokens used",
46
+ ["model", "type"], # type: prompt, completion
47
+ )
48
+
49
+ # Cache metrics
50
+ cache_hits_total = Counter(
51
+ "cache_hits_total",
52
+ "Total cache hits",
53
+ ["cache_type"], # embedding, retrieval, generation
54
+ )
55
+
56
+ cache_misses_total = Counter(
57
+ "cache_misses_total",
58
+ "Total cache misses",
59
+ ["cache_type"],
60
+ )
61
+
62
+ # Business metrics
63
+ queries_total = Counter(
64
+ "queries_total",
65
+ "Total queries processed",
66
+ ["department", "status"],
67
+ )
68
+
69
+ documents_indexed_total = Counter(
70
+ "documents_indexed_total",
71
+ "Total documents indexed",
72
+ ["department", "file_type"],
73
+ )
74
+
75
+ query_confidence_score = Histogram(
76
+ "query_confidence_score",
77
+ "Query confidence scores",
78
+ ["department"],
79
+ buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
80
+ )
81
+
82
+ # System metrics
83
+ active_requests = Gauge(
84
+ "active_requests",
85
+ "Number of active requests",
86
+ )
87
+
88
+ database_connections_active = Gauge(
89
+ "database_connections_active",
90
+ "Active database connections",
91
+ )
92
+
93
+ # Error metrics
94
+ errors_total = Counter(
95
+ "errors_total",
96
+ "Total errors",
97
+ ["error_type", "component"],
98
+ )
app/domain/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Domain entities"""
app/domain/entities/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Domain entities"""
2
+
3
+ from app.domain.entities.document import Document, DocumentChunk, DocumentStatus, DocumentType
4
+ from app.domain.entities.query import Query, QueryRequest, QueryStatus, Source
5
+
6
+ __all__ = [
7
+ "Document",
8
+ "DocumentChunk",
9
+ "DocumentStatus",
10
+ "DocumentType",
11
+ "Query",
12
+ "QueryRequest",
13
+ "QueryStatus",
14
+ "Source",
15
+ ]
app/domain/entities/document.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Domain Layer - Document Entity
3
+
4
+ Represents a document in the knowledge base.
5
+ """
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+ from enum import Enum
9
+ from typing import Optional
10
+ from uuid import UUID, uuid4
11
+
12
+
13
+ class DocumentStatus(str, Enum):
14
+ """Document processing status"""
15
+
16
+ PENDING = "pending"
17
+ PROCESSING = "processing"
18
+ INDEXED = "indexed"
19
+ FAILED = "failed"
20
+
21
+
22
+ class DocumentType(str, Enum):
23
+ """Supported document types"""
24
+
25
+ PDF = "pdf"
26
+ DOCX = "docx"
27
+ TXT = "txt"
28
+ MD = "md"
29
+ HTML = "html"
30
+
31
+
32
+ @dataclass
33
+ class Document:
34
+ """Document entity - core business object"""
35
+
36
+ title: str
37
+ filename: str
38
+ file_type: DocumentType
39
+ file_size: int
40
+ storage_path: str
41
+ department: str
42
+ id: UUID = field(default_factory=uuid4)
43
+ status: DocumentStatus = DocumentStatus.PENDING
44
+ upload_session_id: Optional[str] = None
45
+ uploaded_at: datetime = field(default_factory=datetime.utcnow)
46
+ indexed_at: Optional[datetime] = None
47
+ metadata: dict = field(default_factory=dict)
48
+ created_at: datetime = field(default_factory=datetime.utcnow)
49
+ updated_at: datetime = field(default_factory=datetime.utcnow)
50
+
51
+ def mark_as_processing(self) -> None:
52
+ """Mark document as being processed"""
53
+ self.status = DocumentStatus.PROCESSING
54
+ self.updated_at = datetime.utcnow()
55
+
56
+ def mark_as_indexed(self) -> None:
57
+ """Mark document as successfully indexed"""
58
+ self.status = DocumentStatus.INDEXED
59
+ self.indexed_at = datetime.utcnow()
60
+ self.updated_at = datetime.utcnow()
61
+
62
+ def mark_as_failed(self) -> None:
63
+ """Mark document processing as failed"""
64
+ self.status = DocumentStatus.FAILED
65
+ self.updated_at = datetime.utcnow()
66
+
67
+ def is_indexed(self) -> bool:
68
+ """Check if document is indexed"""
69
+ return self.status == DocumentStatus.INDEXED
70
+
71
+
72
+ @dataclass
73
+ class DocumentChunk:
74
+ """Document chunk - piece of document for vector search"""
75
+
76
+ document_id: UUID
77
+ chunk_index: int
78
+ content: str
79
+ token_count: int
80
+ id: UUID = field(default_factory=uuid4)
81
+ vector_id: Optional[str] = None
82
+ metadata: dict = field(default_factory=dict)
83
+ created_at: datetime = field(default_factory=datetime.utcnow)
84
+
85
+ def set_vector_id(self, vector_id: str) -> None:
86
+ """Set Qdrant vector ID"""
87
+ self.vector_id = vector_id
app/domain/entities/query.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Domain Layer - Query Entity
3
+
4
+ Represents a user query and its result.
5
+ """
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+ from enum import Enum
9
+ from typing import List, Optional
10
+ from uuid import UUID, uuid4
11
+
12
+
13
+ class QueryStatus(str, Enum):
14
+ """Query processing status"""
15
+
16
+ PENDING = "pending"
17
+ PROCESSING = "processing"
18
+ COMPLETED = "completed"
19
+ FAILED = "failed"
20
+
21
+
22
+ @dataclass
23
+ class Source:
24
+ """Retrieved source/citation for a query answer"""
25
+
26
+ title: str
27
+ content: str
28
+ relevance_score: float
29
+ document_id: UUID
30
+ chunk_index: int
31
+ metadata: dict = field(default_factory=dict)
32
+
33
+ def to_dict(self) -> dict:
34
+ """Convert to dictionary"""
35
+ return {
36
+ "title": self.title,
37
+ "content": self.content,
38
+ "relevance_score": self.relevance_score,
39
+ "document_id": str(self.document_id),
40
+ "chunk_index": self.chunk_index,
41
+ "metadata": self.metadata,
42
+ }
43
+
44
+
45
+ @dataclass
46
+ class Query:
47
+ """Query entity - represents user question"""
48
+
49
+ query_text: str
50
+ department: str
51
+ user_id: Optional[str] = None
52
+ session_id: Optional[str] = None
53
+ id: UUID = field(default_factory=uuid4)
54
+ status: QueryStatus = QueryStatus.PENDING
55
+ answer: Optional[str] = None
56
+ sources: List[Source] = field(default_factory=list)
57
+ confidence: float = 0.0
58
+ duration_ms: int = 0
59
+ tokens_used: int = 0
60
+ model: Optional[str] = None
61
+ created_at: datetime = field(default_factory=datetime.utcnow)
62
+ completed_at: Optional[datetime] = None
63
+
64
+ def mark_as_processing(self) -> None:
65
+ """Mark query as being processed"""
66
+ self.status = QueryStatus.PROCESSING
67
+
68
+ def mark_as_completed(
69
+ self,
70
+ answer: str,
71
+ sources: List[Source],
72
+ confidence: float,
73
+ duration_ms: int,
74
+ tokens_used: int,
75
+ model: str,
76
+ ) -> None:
77
+ """Mark query as completed with results"""
78
+ self.status = QueryStatus.COMPLETED
79
+ self.answer = answer
80
+ self.sources = sources
81
+ self.confidence = confidence
82
+ self.duration_ms = duration_ms
83
+ self.tokens_used = tokens_used
84
+ self.model = model
85
+ self.completed_at = datetime.utcnow()
86
+
87
+ def mark_as_failed(self) -> None:
88
+ """Mark query as failed"""
89
+ self.status = QueryStatus.FAILED
90
+ self.completed_at = datetime.utcnow()
91
+
92
+
93
+ @dataclass
94
+ class QueryRequest:
95
+ """Query request from user - value object"""
96
+
97
+ query_text: str
98
+ department: str
99
+ user_id: Optional[str] = None
100
+ session_id: Optional[str] = None
101
+ top_k: int = 10
102
+ temperature: float = 0.7
103
+ max_tokens: int = 2048
104
+ filters: dict = field(default_factory=dict)
105
+
106
+ def __post_init__(self) -> None:
107
+ """Validate query request"""
108
+ if not self.query_text or len(self.query_text.strip()) == 0:
109
+ raise ValueError("query_text cannot be empty")
110
+ if self.top_k < 1 or self.top_k > 50:
111
+ raise ValueError("top_k must be between 1 and 50")
112
+ if self.temperature < 0 or self.temperature > 1:
113
+ raise ValueError("temperature must be between 0 and 1")
app/domain/interfaces/__init__.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Domain interfaces"""
2
+
3
+ from app.domain.interfaces.cache import ICache
4
+ from app.domain.interfaces.llm import ILLM, IPromptBuilder, LLMMessage, LLMResponse
5
+ from app.domain.interfaces.repository import IChunkRepository, IDocumentRepository
6
+ from app.domain.interfaces.retriever import IEmbedder, IReranker, IRetriever, RetrievalResult
7
+
8
+ __all__ = [
9
+ "ICache",
10
+ "IChunkRepository",
11
+ "IDocumentRepository",
12
+ "IEmbedder",
13
+ "ILLM",
14
+ "IPromptBuilder",
15
+ "IReranker",
16
+ "IRetriever",
17
+ "LLMMessage",
18
+ "LLMResponse",
19
+ "RetrievalResult",
20
+ ]
app/domain/interfaces/cache.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Domain Layer - Cache Interface
3
+
4
+ Defines contract for caching implementations.
5
+ """
6
+ from abc import ABC, abstractmethod
7
+ from typing import Any, Optional
8
+
9
+
10
+ class ICache(ABC):
11
+ """Interface for cache implementations"""
12
+
13
+ @abstractmethod
14
+ async def get(self, key: str) -> Optional[Any]:
15
+ """Get value from cache"""
16
+ pass
17
+
18
+ @abstractmethod
19
+ async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool:
20
+ """Set value in cache"""
21
+ pass
22
+
23
+ @abstractmethod
24
+ async def delete(self, key: str) -> bool:
25
+ """Delete key from cache"""
26
+ pass
27
+
28
+ @abstractmethod
29
+ async def exists(self, key: str) -> bool:
30
+ """Check if key exists"""
31
+ pass
32
+
33
+ @abstractmethod
34
+ async def clear(self, pattern: str = "*") -> int:
35
+ """Clear cache by pattern"""
36
+ pass
app/domain/interfaces/llm.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Domain Layer - LLM Interface
3
+
4
+ Defines contract for LLM implementations.
5
+ """
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass
8
+ from typing import AsyncIterator, List, Optional
9
+
10
+
11
+ @dataclass
12
+ class LLMMessage:
13
+ """Chat message"""
14
+
15
+ role: str # system, user, assistant
16
+ content: str
17
+
18
+
19
+ @dataclass
20
+ class LLMResponse:
21
+ """LLM generation response"""
22
+
23
+ content: str
24
+ model: str
25
+ tokens_used: int
26
+ finish_reason: str
27
+
28
+
29
+ class ILLM(ABC):
30
+ """Interface for LLM implementations"""
31
+
32
+ @abstractmethod
33
+ async def generate(
34
+ self,
35
+ messages: List[LLMMessage],
36
+ temperature: float = 0.7,
37
+ max_tokens: int = 2048,
38
+ stream: bool = False,
39
+ ) -> LLMResponse:
40
+ """Generate response from LLM"""
41
+ pass
42
+
43
+ @abstractmethod
44
+ async def generate_stream(
45
+ self,
46
+ messages: List[LLMMessage],
47
+ temperature: float = 0.7,
48
+ max_tokens: int = 2048,
49
+ ) -> AsyncIterator[str]:
50
+ """Generate streaming response from LLM"""
51
+ pass
52
+
53
+ @abstractmethod
54
+ def get_model_name(self) -> str:
55
+ """Get model name"""
56
+ pass
57
+
58
+
59
+ class IPromptBuilder(ABC):
60
+ """Interface for prompt building"""
61
+
62
+ @abstractmethod
63
+ def build_rag_prompt(
64
+ self, query: str, context: List[str], system_prompt: Optional[str] = None
65
+ ) -> List[LLMMessage]:
66
+ """Build RAG prompt with query and context"""
67
+ pass
68
+
69
+ @abstractmethod
70
+ def build_query_expansion_prompt(self, query: str) -> List[LLMMessage]:
71
+ """Build prompt for query expansion"""
72
+ pass
app/domain/interfaces/repository.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Domain Layer - Interfaces (Ports)
3
+
4
+ Defines contracts for infrastructure implementations.
5
+ """
6
+ from abc import ABC, abstractmethod
7
+ from typing import List, Optional
8
+ from uuid import UUID
9
+
10
+ from app.domain.entities import Document, DocumentChunk
11
+
12
+
13
+ class IDocumentRepository(ABC):
14
+ """Repository interface for document persistence"""
15
+
16
+ @abstractmethod
17
+ async def create(self, document: Document) -> Document:
18
+ """Create new document"""
19
+ pass
20
+
21
+ @abstractmethod
22
+ async def get_by_id(self, document_id: UUID) -> Optional[Document]:
23
+ """Get document by ID"""
24
+ pass
25
+
26
+ @abstractmethod
27
+ async def update(self, document: Document) -> Document:
28
+ """Update document"""
29
+ pass
30
+
31
+ @abstractmethod
32
+ async def list_by_department(
33
+ self, department: str, skip: int = 0, limit: int = 100
34
+ ) -> List[Document]:
35
+ """List documents by department"""
36
+ pass
37
+
38
+ @abstractmethod
39
+ async def delete(self, document_id: UUID) -> bool:
40
+ """Delete document"""
41
+ pass
42
+
43
+
44
+ class IChunkRepository(ABC):
45
+ """Repository interface for document chunks"""
46
+
47
+ @abstractmethod
48
+ async def create_bulk(self, chunks: List[DocumentChunk]) -> List[DocumentChunk]:
49
+ """Create multiple chunks"""
50
+ pass
51
+
52
+ @abstractmethod
53
+ async def get_by_document_id(self, document_id: UUID) -> List[DocumentChunk]:
54
+ """Get all chunks for a document"""
55
+ pass
56
+
57
+ @abstractmethod
58
+ async def delete_by_document_id(self, document_id: UUID) -> int:
59
+ """Delete all chunks for a document"""
60
+ pass
app/domain/interfaces/retriever.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Domain Layer - Retriever Interface
3
+
4
+ Defines contract for document retrieval implementations.
5
+ """
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass
8
+ from typing import List, Optional
9
+
10
+
11
+ @dataclass
12
+ class RetrievalResult:
13
+ """Single retrieval result"""
14
+
15
+ content: str
16
+ score: float
17
+ document_id: str
18
+ chunk_index: int
19
+ metadata: dict
20
+
21
+
22
+ class IRetriever(ABC):
23
+ """Interface for document retrieval"""
24
+
25
+ @abstractmethod
26
+ async def search(
27
+ self,
28
+ query: str,
29
+ top_k: int = 10,
30
+ filters: Optional[dict] = None,
31
+ min_score: float = 0.0,
32
+ ) -> List[RetrievalResult]:
33
+ """Search for relevant documents"""
34
+ pass
35
+
36
+ @abstractmethod
37
+ async def hybrid_search(
38
+ self,
39
+ query: str,
40
+ top_k: int = 10,
41
+ alpha: float = 0.5,
42
+ filters: Optional[dict] = None,
43
+ ) -> List[RetrievalResult]:
44
+ """Hybrid search (semantic + keyword)"""
45
+ pass
46
+
47
+
48
+ class IReranker(ABC):
49
+ """Interface for result reranking"""
50
+
51
+ @abstractmethod
52
+ async def rerank(
53
+ self, query: str, results: List[RetrievalResult], top_k: int = 10
54
+ ) -> List[RetrievalResult]:
55
+ """Rerank retrieval results"""
56
+ pass
57
+
58
+
59
+ class IEmbedder(ABC):
60
+ """Interface for text embedding"""
61
+
62
+ @abstractmethod
63
+ async def embed_text(self, text: str) -> List[float]:
64
+ """Generate embedding for single text"""
65
+ pass
66
+
67
+ @abstractmethod
68
+ async def embed_texts(self, texts: List[str]) -> List[List[float]]:
69
+ """Generate embeddings for multiple texts"""
70
+ pass
71
+
72
+ @abstractmethod
73
+ def get_dimension(self) -> int:
74
+ """Get embedding dimension"""
75
+ pass
app/infrastructure/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Infrastructure layer"""
app/infrastructure/cache/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Cache implementations"""
app/infrastructure/cache/redis_cache.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - Redis Cache Implementation
3
+ """
4
+ import json
5
+ from typing import Any, Optional
6
+
7
+ import redis.asyncio as redis
8
+
9
+ from app.domain.interfaces import ICache
10
+
11
+
12
+ class RedisCache(ICache):
13
+ """Redis cache implementation"""
14
+
15
+ def __init__(self, redis_url: str):
16
+ self.redis_url = redis_url
17
+ self._client: Optional[redis.Redis] = None
18
+
19
+ async def _get_client(self) -> redis.Redis:
20
+ """Get or create Redis client"""
21
+ if self._client is None:
22
+ self._client = await redis.from_url(
23
+ self.redis_url, encoding="utf-8", decode_responses=True
24
+ )
25
+ return self._client
26
+
27
+ async def get(self, key: str) -> Optional[Any]:
28
+ """Get value from cache"""
29
+ client = await self._get_client()
30
+ value = await client.get(key)
31
+
32
+ if value is None:
33
+ return None
34
+
35
+ try:
36
+ return json.loads(value)
37
+ except json.JSONDecodeError:
38
+ return value
39
+
40
+ async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool:
41
+ """Set value in cache"""
42
+ client = await self._get_client()
43
+
44
+ # Serialize value
45
+ if isinstance(value, (dict, list)):
46
+ serialized = json.dumps(value)
47
+ else:
48
+ serialized = str(value)
49
+
50
+ if ttl:
51
+ await client.setex(key, ttl, serialized)
52
+ else:
53
+ await client.set(key, serialized)
54
+
55
+ return True
56
+
57
+ async def delete(self, key: str) -> bool:
58
+ """Delete key from cache"""
59
+ client = await self._get_client()
60
+ result = await client.delete(key)
61
+ return result > 0
62
+
63
+ async def exists(self, key: str) -> bool:
64
+ """Check if key exists"""
65
+ client = await self._get_client()
66
+ return await client.exists(key) > 0
67
+
68
+ async def clear(self, pattern: str = "*") -> int:
69
+ """Clear cache by pattern"""
70
+ client = await self._get_client()
71
+ keys = []
72
+
73
+ async for key in client.scan_iter(match=pattern):
74
+ keys.append(key)
75
+
76
+ if keys:
77
+ return await client.delete(*keys)
78
+
79
+ return 0
80
+
81
+ async def close(self) -> None:
82
+ """Close Redis connection"""
83
+ if self._client:
84
+ await self._client.close()
app/infrastructure/database/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Database infrastructure"""
app/infrastructure/database/models.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - Database Models (SQLAlchemy)
3
+ """
4
+ import uuid
5
+ from datetime import datetime
6
+
7
+ from sqlalchemy import Column, DateTime, Integer, String, Text, BigInteger, Index
8
+ from sqlalchemy.dialects.postgresql import JSONB, UUID
9
+ from sqlalchemy.ext.declarative import declarative_base
10
+
11
+ Base = declarative_base()
12
+
13
+
14
+ class DocumentModel(Base):
15
+ """Document table model"""
16
+
17
+ __tablename__ = "documents"
18
+
19
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
20
+ title = Column(String(500), nullable=False, index=True)
21
+ filename = Column(String(255), nullable=False)
22
+ file_type = Column(String(50), nullable=False, index=True)
23
+ file_size = Column(BigInteger, nullable=False)
24
+ storage_path = Column(String(1000), nullable=False)
25
+ department = Column(String(100), nullable=False, index=True)
26
+ status = Column(String(50), nullable=False, default="pending", index=True)
27
+ upload_session_id = Column(String(100), nullable=True)
28
+ uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
29
+ indexed_at = Column(DateTime(timezone=True), nullable=True)
30
+ metadata = Column(JSONB, default={}, nullable=False)
31
+ created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
32
+ updated_at = Column(
33
+ DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False
34
+ )
35
+
36
+ __table_args__ = (
37
+ Index("ix_documents_department_status", "department", "status"),
38
+ Index("ix_documents_created_at", "created_at"),
39
+ )
40
+
41
+
42
+ class DocumentChunkModel(Base):
43
+ """Document chunk table model"""
44
+
45
+ __tablename__ = "document_chunks"
46
+
47
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
48
+ document_id = Column(UUID(as_uuid=True), nullable=False, index=True)
49
+ chunk_index = Column(Integer, nullable=False)
50
+ content = Column(Text, nullable=False)
51
+ token_count = Column(Integer, nullable=False)
52
+ vector_id = Column(String(100), nullable=True, index=True)
53
+ metadata = Column(JSONB, default={}, nullable=False)
54
+ created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
55
+
56
+ __table_args__ = (Index("ix_chunks_document_id_index", "document_id", "chunk_index"),)
57
+
58
+
59
+ class QueryModel(Base):
60
+ """Query table model"""
61
+
62
+ __tablename__ = "queries"
63
+
64
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
65
+ query_text = Column(Text, nullable=False)
66
+ department = Column(String(100), nullable=False, index=True)
67
+ user_id = Column(String(100), nullable=True, index=True)
68
+ session_id = Column(String(100), nullable=True, index=True)
69
+ status = Column(String(50), nullable=False, default="pending", index=True)
70
+ answer = Column(Text, nullable=True)
71
+ sources = Column(JSONB, default=[], nullable=False)
72
+ confidence = Column(Integer, default=0, nullable=False) # Store as int (0-100)
73
+ duration_ms = Column(Integer, default=0, nullable=False)
74
+ tokens_used = Column(Integer, default=0, nullable=False)
75
+ model = Column(String(100), nullable=True)
76
+ created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False, index=True)
77
+ completed_at = Column(DateTime(timezone=True), nullable=True)
78
+
79
+ __table_args__ = (
80
+ Index("ix_queries_department_created", "department", "created_at"),
81
+ Index("ix_queries_user_created", "user_id", "created_at"),
82
+ )
app/infrastructure/external/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """External services"""
app/infrastructure/external/embedder.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - Sentence Transformers Embedding Service
3
+ """
4
+ from typing import List
5
+
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ from app.domain.interfaces import IEmbedder
9
+
10
+
11
+ class SentenceTransformerEmbedder(IEmbedder):
12
+ """Sentence Transformers embedding implementation"""
13
+
14
+ def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
15
+ self.model_name = model_name
16
+ self.model = SentenceTransformer(model_name)
17
+ self.dimension = self.model.get_sentence_embedding_dimension()
18
+
19
+ async def embed_text(self, text: str) -> List[float]:
20
+ """Generate embedding for single text"""
21
+ embedding = self.model.encode(text, convert_to_numpy=True)
22
+ return embedding.tolist()
23
+
24
+ async def embed_texts(self, texts: List[str]) -> List[List[float]]:
25
+ """Generate embeddings for multiple texts"""
26
+ embeddings = self.model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
27
+ return embeddings.tolist()
28
+
29
+ def get_dimension(self) -> int:
30
+ """Get embedding dimension"""
31
+ return self.dimension
app/infrastructure/external/gemini_llm.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - Gemini LLM Service
3
+ """
4
+ from typing import AsyncIterator, List
5
+
6
+ import google.generativeai as genai
7
+
8
+ from app.domain.interfaces import ILLM, LLMMessage, LLMResponse
9
+
10
+
11
+ class GeminiLLM(ILLM):
12
+ """Gemini LLM implementation"""
13
+
14
+ def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
15
+ genai.configure(api_key=api_key)
16
+ self.model_name = model_name
17
+ self.model = genai.GenerativeModel(model_name)
18
+
19
+ async def generate(
20
+ self,
21
+ messages: List[LLMMessage],
22
+ temperature: float = 0.7,
23
+ max_tokens: int = 2048,
24
+ stream: bool = False,
25
+ ) -> LLMResponse:
26
+ """Generate response from Gemini"""
27
+
28
+ # Convert messages to Gemini format
29
+ prompt = self._build_prompt(messages)
30
+
31
+ # Generate
32
+ response = await self.model.generate_content_async(
33
+ prompt,
34
+ generation_config=genai.types.GenerationConfig(
35
+ temperature=temperature, max_output_tokens=max_tokens
36
+ ),
37
+ )
38
+
39
+ # Count tokens (approximate)
40
+ tokens_used = len(prompt.split()) + len(response.text.split())
41
+
42
+ return LLMResponse(
43
+ content=response.text,
44
+ model=self.model_name,
45
+ tokens_used=tokens_used,
46
+ finish_reason="stop",
47
+ )
48
+
49
+ async def generate_stream(
50
+ self,
51
+ messages: List[LLMMessage],
52
+ temperature: float = 0.7,
53
+ max_tokens: int = 2048,
54
+ ) -> AsyncIterator[str]:
55
+ """Generate streaming response from Gemini"""
56
+
57
+ prompt = self._build_prompt(messages)
58
+
59
+ response = await self.model.generate_content_async(
60
+ prompt,
61
+ generation_config=genai.types.GenerationConfig(
62
+ temperature=temperature, max_output_tokens=max_tokens
63
+ ),
64
+ stream=True,
65
+ )
66
+
67
+ async for chunk in response:
68
+ if chunk.text:
69
+ yield chunk.text
70
+
71
+ def get_model_name(self) -> str:
72
+ """Get model name"""
73
+ return self.model_name
74
+
75
+ def _build_prompt(self, messages: List[LLMMessage]) -> str:
76
+ """Build prompt from messages"""
77
+ parts = []
78
+
79
+ for msg in messages:
80
+ if msg.role == "system":
81
+ parts.append(f"System: {msg.content}")
82
+ elif msg.role == "user":
83
+ parts.append(f"User: {msg.content}")
84
+ elif msg.role == "assistant":
85
+ parts.append(f"Assistant: {msg.content}")
86
+
87
+ return "\n\n".join(parts)
app/infrastructure/external/prompt_builder.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - Prompt Builder Implementation
3
+ """
4
+ from typing import List, Optional
5
+
6
+ from app.domain.interfaces import IPromptBuilder, LLMMessage
7
+
8
+
9
+ class DefaultPromptBuilder(IPromptBuilder):
10
+ """Default prompt builder implementation"""
11
+
12
+ def build_rag_prompt(
13
+ self, query: str, context: List[str], system_prompt: Optional[str] = None
14
+ ) -> List[LLMMessage]:
15
+ """Build RAG prompt with query and context"""
16
+
17
+ if system_prompt is None:
18
+ system_prompt = """You are a helpful corporate onboarding assistant.
19
+ Answer questions based ONLY on the provided context.
20
+ If the answer is not in the context, say "I don't have enough information to answer that question."
21
+ Always cite your sources by referencing the relevant context sections."""
22
+
23
+ # Build context string
24
+ context_str = "\n\n---\n\n".join(
25
+ [f"[Context {i+1}]\n{ctx}" for i, ctx in enumerate(context)]
26
+ )
27
+
28
+ user_message = f"""Context:
29
+ {context_str}
30
+
31
+ Question: {query}
32
+
33
+ Please provide a clear, accurate answer based on the context above. Include citations."""
34
+
35
+ return [
36
+ LLMMessage(role="system", content=system_prompt),
37
+ LLMMessage(role="user", content=user_message),
38
+ ]
39
+
40
+ def build_query_expansion_prompt(self, query: str) -> List[LLMMessage]:
41
+ """Build prompt for query expansion"""
42
+
43
+ system_prompt = """You are a query expansion expert.
44
+ Generate 2-3 alternative phrasings of the user's question to improve retrieval.
45
+ Return only the alternative questions, one per line."""
46
+
47
+ user_message = f"""Original question: {query}
48
+
49
+ Generate alternative phrasings:"""
50
+
51
+ return [
52
+ LLMMessage(role="system", content=system_prompt),
53
+ LLMMessage(role="user", content=user_message),
54
+ ]
app/infrastructure/external/qdrant_retriever.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - Qdrant Vector Store
3
+ """
4
+ from typing import Dict, List, Optional
5
+ from uuid import UUID
6
+
7
+ from qdrant_client import AsyncQdrantClient
8
+ from qdrant_client.models import Distance, PointStruct, VectorParams, Filter, FieldCondition, MatchValue
9
+
10
+ from app.domain.interfaces import IRetriever, RetrievalResult
11
+
12
+
13
+ class QdrantRetriever(IRetriever):
14
+ """Qdrant vector store implementation"""
15
+
16
+ def __init__(
17
+ self,
18
+ url: str,
19
+ collection_name: str,
20
+ vector_size: int = 384,
21
+ api_key: Optional[str] = None,
22
+ ):
23
+ self.url = url
24
+ self.collection_name = collection_name
25
+ self.vector_size = vector_size
26
+ self.client = AsyncQdrantClient(url=url, api_key=api_key)
27
+
28
+ async def initialize_collection(self) -> None:
29
+ """Initialize Qdrant collection"""
30
+ collections = await self.client.get_collections()
31
+ collection_names = [c.name for c in collections.collections]
32
+
33
+ if self.collection_name not in collection_names:
34
+ await self.client.create_collection(
35
+ collection_name=self.collection_name,
36
+ vectors_config=VectorParams(size=self.vector_size, distance=Distance.COSINE),
37
+ )
38
+
39
+ async def search(
40
+ self,
41
+ query: str,
42
+ top_k: int = 10,
43
+ filters: Optional[dict] = None,
44
+ min_score: float = 0.0,
45
+ ) -> List[RetrievalResult]:
46
+ """Search for relevant documents"""
47
+ # Note: This requires embedding the query first
48
+ # In practice, this would be called with query_vector
49
+ raise NotImplementedError("Use search_by_vector instead")
50
+
51
+ async def search_by_vector(
52
+ self,
53
+ query_vector: List[float],
54
+ top_k: int = 10,
55
+ filters: Optional[dict] = None,
56
+ min_score: float = 0.0,
57
+ ) -> List[RetrievalResult]:
58
+ """Search by pre-computed vector"""
59
+
60
+ # Build filter
61
+ qdrant_filter = None
62
+ if filters:
63
+ conditions = []
64
+ for key, value in filters.items():
65
+ conditions.append(
66
+ FieldCondition(key=key, match=MatchValue(value=value))
67
+ )
68
+ if conditions:
69
+ qdrant_filter = Filter(must=conditions)
70
+
71
+ # Search
72
+ search_result = await self.client.search(
73
+ collection_name=self.collection_name,
74
+ query_vector=query_vector,
75
+ limit=top_k,
76
+ query_filter=qdrant_filter,
77
+ score_threshold=min_score,
78
+ )
79
+
80
+ # Convert to RetrievalResult
81
+ results = []
82
+ for hit in search_result:
83
+ payload = hit.payload or {}
84
+ results.append(
85
+ RetrievalResult(
86
+ content=payload.get("content", ""),
87
+ score=hit.score,
88
+ document_id=payload.get("document_id", ""),
89
+ chunk_index=payload.get("chunk_index", 0),
90
+ metadata=payload.get("metadata", {}),
91
+ )
92
+ )
93
+
94
+ return results
95
+
96
+ async def hybrid_search(
97
+ self,
98
+ query: str,
99
+ top_k: int = 10,
100
+ alpha: float = 0.5,
101
+ filters: Optional[dict] = None,
102
+ ) -> List[RetrievalResult]:
103
+ """Hybrid search (semantic + keyword)"""
104
+ # Simplified - in production combine with keyword search
105
+ raise NotImplementedError("Hybrid search requires integration with keyword search")
106
+
107
+ async def upsert_points(
108
+ self,
109
+ points: List[Dict],
110
+ ) -> None:
111
+ """Upsert points to collection"""
112
+ qdrant_points = [
113
+ PointStruct(
114
+ id=point["id"],
115
+ vector=point["vector"],
116
+ payload=point["payload"],
117
+ )
118
+ for point in points
119
+ ]
120
+
121
+ await self.client.upsert(
122
+ collection_name=self.collection_name,
123
+ points=qdrant_points,
124
+ )
app/infrastructure/external/simple_reranker.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - Simple Reranker (Placeholder)
3
+
4
+ In production, use cross-encoder reranker.
5
+ """
6
+ from typing import List
7
+
8
+ from app.domain.interfaces import IReranker, RetrievalResult
9
+
10
+
11
+ class SimpleReranker(IReranker):
12
+ """Simple reranker - just returns top-k by score"""
13
+
14
+ async def rerank(
15
+ self, query: str, results: List[RetrievalResult], top_k: int = 10
16
+ ) -> List[RetrievalResult]:
17
+ """Rerank results (simplified - just sort by score)"""
18
+ # Sort by score descending
19
+ sorted_results = sorted(results, key=lambda x: x.score, reverse=True)
20
+ return sorted_results[:top_k]
app/infrastructure/repositories/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Repositories"""
app/infrastructure/repositories/postgres_repository.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - PostgreSQL Repository Implementation
3
+ """
4
+ from typing import List, Optional
5
+ from uuid import UUID
6
+
7
+ from sqlalchemy import select
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ from app.domain.entities import Document, DocumentChunk, DocumentStatus, DocumentType
11
+ from app.domain.interfaces import IChunkRepository, IDocumentRepository
12
+ from app.infrastructure.database.models import DocumentChunkModel, DocumentModel
13
+
14
+
15
+ class PostgresDocumentRepository(IDocumentRepository):
16
+ """PostgreSQL implementation of document repository"""
17
+
18
+ def __init__(self, session: AsyncSession):
19
+ self.session = session
20
+
21
+ async def create(self, document: Document) -> Document:
22
+ """Create new document"""
23
+ model = DocumentModel(
24
+ id=document.id,
25
+ title=document.title,
26
+ filename=document.filename,
27
+ file_type=document.file_type.value,
28
+ file_size=document.file_size,
29
+ storage_path=document.storage_path,
30
+ department=document.department,
31
+ status=document.status.value,
32
+ upload_session_id=document.upload_session_id,
33
+ uploaded_at=document.uploaded_at,
34
+ indexed_at=document.indexed_at,
35
+ metadata=document.metadata,
36
+ )
37
+ self.session.add(model)
38
+ await self.session.commit()
39
+ await self.session.refresh(model)
40
+ return self._to_entity(model)
41
+
42
+ async def get_by_id(self, document_id: UUID) -> Optional[Document]:
43
+ """Get document by ID"""
44
+ result = await self.session.execute(
45
+ select(DocumentModel).where(DocumentModel.id == document_id)
46
+ )
47
+ model = result.scalar_one_or_none()
48
+ return self._to_entity(model) if model else None
49
+
50
+ async def update(self, document: Document) -> Document:
51
+ """Update document"""
52
+ result = await self.session.execute(
53
+ select(DocumentModel).where(DocumentModel.id == document.id)
54
+ )
55
+ model = result.scalar_one_or_none()
56
+
57
+ if not model:
58
+ raise ValueError(f"Document {document.id} not found")
59
+
60
+ model.title = document.title
61
+ model.status = document.status.value
62
+ model.indexed_at = document.indexed_at
63
+ model.metadata = document.metadata
64
+ model.updated_at = document.updated_at
65
+
66
+ await self.session.commit()
67
+ await self.session.refresh(model)
68
+ return self._to_entity(model)
69
+
70
+ async def list_by_department(
71
+ self, department: str, skip: int = 0, limit: int = 100
72
+ ) -> List[Document]:
73
+ """List documents by department"""
74
+ result = await self.session.execute(
75
+ select(DocumentModel)
76
+ .where(DocumentModel.department == department)
77
+ .offset(skip)
78
+ .limit(limit)
79
+ .order_by(DocumentModel.created_at.desc())
80
+ )
81
+ models = result.scalars().all()
82
+ return [self._to_entity(model) for model in models]
83
+
84
+ async def delete(self, document_id: UUID) -> bool:
85
+ """Delete document"""
86
+ result = await self.session.execute(
87
+ select(DocumentModel).where(DocumentModel.id == document_id)
88
+ )
89
+ model = result.scalar_one_or_none()
90
+
91
+ if not model:
92
+ return False
93
+
94
+ await self.session.delete(model)
95
+ await self.session.commit()
96
+ return True
97
+
98
+ def _to_entity(self, model: DocumentModel) -> Document:
99
+ """Convert model to entity"""
100
+ return Document(
101
+ id=model.id,
102
+ title=model.title,
103
+ filename=model.filename,
104
+ file_type=DocumentType(model.file_type),
105
+ file_size=model.file_size,
106
+ storage_path=model.storage_path,
107
+ department=model.department,
108
+ status=DocumentStatus(model.status),
109
+ upload_session_id=model.upload_session_id,
110
+ uploaded_at=model.uploaded_at,
111
+ indexed_at=model.indexed_at,
112
+ metadata=model.metadata,
113
+ created_at=model.created_at,
114
+ updated_at=model.updated_at,
115
+ )
116
+
117
+
118
+ class PostgresChunkRepository(IChunkRepository):
119
+ """PostgreSQL implementation of chunk repository"""
120
+
121
+ def __init__(self, session: AsyncSession):
122
+ self.session = session
123
+
124
+ async def create_bulk(self, chunks: List[DocumentChunk]) -> List[DocumentChunk]:
125
+ """Create multiple chunks"""
126
+ models = [
127
+ DocumentChunkModel(
128
+ id=chunk.id,
129
+ document_id=chunk.document_id,
130
+ chunk_index=chunk.chunk_index,
131
+ content=chunk.content,
132
+ token_count=chunk.token_count,
133
+ vector_id=chunk.vector_id,
134
+ metadata=chunk.metadata,
135
+ )
136
+ for chunk in chunks
137
+ ]
138
+
139
+ self.session.add_all(models)
140
+ await self.session.commit()
141
+
142
+ return chunks
143
+
144
+ async def get_by_document_id(self, document_id: UUID) -> List[DocumentChunk]:
145
+ """Get all chunks for a document"""
146
+ result = await self.session.execute(
147
+ select(DocumentChunkModel)
148
+ .where(DocumentChunkModel.document_id == document_id)
149
+ .order_by(DocumentChunkModel.chunk_index)
150
+ )
151
+ models = result.scalars().all()
152
+ return [self._to_entity(model) for model in models]
153
+
154
+ async def delete_by_document_id(self, document_id: UUID) -> int:
155
+ """Delete all chunks for a document"""
156
+ result = await self.session.execute(
157
+ select(DocumentChunkModel).where(DocumentChunkModel.document_id == document_id)
158
+ )
159
+ models = result.scalars().all()
160
+
161
+ for model in models:
162
+ await self.session.delete(model)
163
+
164
+ await self.session.commit()
165
+ return len(models)
166
+
167
+ def _to_entity(self, model: DocumentChunkModel) -> DocumentChunk:
168
+ """Convert model to entity"""
169
+ return DocumentChunk(
170
+ id=model.id,
171
+ document_id=model.document_id,
172
+ chunk_index=model.chunk_index,
173
+ content=model.content,
174
+ token_count=model.token_count,
175
+ vector_id=model.vector_id,
176
+ metadata=model.metadata,
177
+ created_at=model.created_at,
178
+ )
app/main.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI Application - Main Entry Point
3
+ """
4
+ from contextlib import asynccontextmanager
5
+
6
+ from fastapi import FastAPI
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import JSONResponse
9
+
10
+ from app.core.config import get_settings
11
+ from app.core.logging import get_logger, setup_logging
12
+ from app.presentation.api.v1.endpoints import router as api_router
13
+
14
+ settings = get_settings()
15
+ setup_logging(settings.log_level)
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ @asynccontextmanager
20
+ async def lifespan(app: FastAPI):
21
+ """Application lifespan manager"""
22
+ logger.info("application_startup", version=settings.app_version, env=settings.environment)
23
+
24
+ # TODO: Initialize database connection pool
25
+ # TODO: Initialize Qdrant collection
26
+ # TODO: Warm up embedding model
27
+
28
+ yield
29
+
30
+ # Cleanup
31
+ logger.info("application_shutdown")
32
+
33
+
34
+ app = FastAPI(
35
+ title=settings.app_name,
36
+ version=settings.app_version,
37
+ description="Production-ready RAG backend for corporate employee onboarding",
38
+ lifespan=lifespan,
39
+ )
40
+
41
+ # CORS
42
+ app.add_middleware(
43
+ CORSMiddleware,
44
+ allow_origins=settings.cors_origins,
45
+ allow_credentials=settings.cors_allow_credentials,
46
+ allow_methods=["*"],
47
+ allow_headers=["*"],
48
+ )
49
+
50
+ # Include routers
51
+ app.include_router(api_router)
52
+
53
+
54
+ @app.get("/")
55
+ async def root():
56
+ """Root endpoint"""
57
+ return {
58
+ "service": settings.app_name,
59
+ "version": settings.app_version,
60
+ "status": "running",
61
+ "environment": settings.environment,
62
+ }
63
+
64
+
65
+ @app.exception_handler(Exception)
66
+ async def global_exception_handler(request, exc):
67
+ """Global exception handler"""
68
+ logger.error("unhandled_exception", error=str(exc), exc_info=True)
69
+ return JSONResponse(
70
+ status_code=500,
71
+ content={"error": "Internal server error", "detail": str(exc)},
72
+ )
73
+
74
+
75
+ if __name__ == "__main__":
76
+ import uvicorn
77
+
78
+ uvicorn.run(
79
+ "app.main:app",
80
+ host=settings.host,
81
+ port=settings.port,
82
+ reload=settings.debug,
83
+ log_level=settings.log_level.lower(),
84
+ )
app/presentation/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Presentation layer"""
app/presentation/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """API layer"""
app/presentation/api/v1/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """API v1"""
app/presentation/api/v1/endpoints.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Presentation Layer - API Endpoints
3
+ """
4
+ import time
5
+ from datetime import datetime
6
+ from typing import List
7
+ from uuid import uuid4
8
+
9
+ from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
10
+ from sqlalchemy.ext.asyncio import AsyncSession
11
+
12
+ from app.application.dto import DocumentUploadDTO, QueryDTO
13
+ from app.application.services import ChunkingService
14
+ from app.application.use_cases.document_indexing import DocumentIndexingUseCase
15
+ from app.application.use_cases.query_processing import QueryProcessingUseCase
16
+ from app.core.config import get_settings
17
+ from app.core.logging import get_logger, set_correlation_id
18
+ from app.core.metrics import (
19
+ active_requests,
20
+ http_request_duration_seconds,
21
+ http_requests_total,
22
+ queries_total,
23
+ )
24
+ from app.infrastructure.cache.redis_cache import RedisCache
25
+ from app.infrastructure.external.embedder import SentenceTransformerEmbedder
26
+ from app.infrastructure.external.gemini_llm import GeminiLLM
27
+ from app.infrastructure.external.prompt_builder import DefaultPromptBuilder
28
+ from app.infrastructure.external.qdrant_retriever import QdrantRetriever
29
+ from app.infrastructure.repositories.postgres_repository import (
30
+ PostgresChunkRepository,
31
+ PostgresDocumentRepository,
32
+ )
33
+ from app.presentation.api.v1.schemas import (
34
+ DocumentResponse,
35
+ HealthResponse,
36
+ QueryRequest,
37
+ QueryResponse,
38
+ SourceSchema,
39
+ )
40
+
41
+ router = APIRouter(prefix="/api/v1", tags=["api"])
42
+ logger = get_logger(__name__)
43
+ settings = get_settings()
44
+
45
+
46
+ # Dependency injection (simplified - in production use proper DI container)
47
+ async def get_query_use_case() -> QueryProcessingUseCase:
48
+ """Get query processing use case"""
49
+ # Initialize services
50
+ embedder = SentenceTransformerEmbedder(settings.embedding_model)
51
+ retriever = QdrantRetriever(
52
+ url=settings.qdrant_url,
53
+ collection_name=settings.qdrant_collection_name,
54
+ vector_size=settings.qdrant_vector_size,
55
+ api_key=settings.qdrant_api_key if settings.qdrant_api_key else None,
56
+ )
57
+ llm = GeminiLLM(api_key=settings.gemini_api_key, model_name=settings.gemini_model)
58
+ prompt_builder = DefaultPromptBuilder()
59
+ cache = RedisCache(redis_url=settings.redis_url)
60
+
61
+ # For now, using a simple reranker (in production use cross-encoder)
62
+ from app.infrastructure.external.simple_reranker import SimpleReranker
63
+
64
+ reranker = SimpleReranker()
65
+
66
+ return QueryProcessingUseCase(
67
+ retriever=retriever,
68
+ reranker=reranker,
69
+ llm=llm,
70
+ prompt_builder=prompt_builder,
71
+ cache=cache,
72
+ )
73
+
74
+
75
+ @router.post("/query", response_model=QueryResponse, status_code=status.HTTP_200_OK)
76
+ async def process_query(
77
+ request: QueryRequest,
78
+ use_case: QueryProcessingUseCase = Depends(get_query_use_case),
79
+ ) -> QueryResponse:
80
+ """Process user query through RAG pipeline"""
81
+ start_time = time.time()
82
+ correlation_id = str(uuid4())
83
+ set_correlation_id(correlation_id)
84
+
85
+ active_requests.inc()
86
+
87
+ try:
88
+ logger.info("processing_query", query=request.query_text, department=request.department)
89
+
90
+ # Convert to DTO
91
+ query_dto = QueryDTO(
92
+ query_text=request.query_text,
93
+ department=request.department,
94
+ user_id=request.user_id,
95
+ session_id=request.session_id,
96
+ top_k=request.top_k,
97
+ temperature=request.temperature,
98
+ max_tokens=request.max_tokens,
99
+ filters=request.filters,
100
+ )
101
+
102
+ # Execute use case
103
+ response_dto = await use_case.execute(query_dto)
104
+
105
+ # Convert to response schema
106
+ response = QueryResponse(
107
+ query_id=response_dto.query_id,
108
+ answer=response_dto.answer,
109
+ sources=[
110
+ SourceSchema(
111
+ title=src.title,
112
+ content=src.content,
113
+ relevance_score=src.relevance_score,
114
+ document_id=src.document_id,
115
+ chunk_index=src.chunk_index,
116
+ metadata=src.metadata,
117
+ )
118
+ for src in response_dto.sources
119
+ ],
120
+ confidence=response_dto.confidence,
121
+ processing_time_ms=response_dto.processing_time_ms,
122
+ tokens_used=response_dto.tokens_used,
123
+ model=response_dto.model,
124
+ )
125
+
126
+ # Metrics
127
+ duration = time.time() - start_time
128
+ http_requests_total.labels(method="POST", endpoint="/api/v1/query", status="200").inc()
129
+ http_request_duration_seconds.labels(method="POST", endpoint="/api/v1/query").observe(
130
+ duration
131
+ )
132
+ queries_total.labels(department=request.department, status="success").inc()
133
+
134
+ logger.info("query_processed", query_id=response.query_id, duration_ms=int(duration * 1000))
135
+
136
+ return response
137
+
138
+ except Exception as e:
139
+ logger.error("query_processing_error", error=str(e), exc_info=True)
140
+ http_requests_total.labels(method="POST", endpoint="/api/v1/query", status="500").inc()
141
+ queries_total.labels(department=request.department, status="error").inc()
142
+ raise HTTPException(status_code=500, detail=f"Query processing failed: {str(e)}")
143
+
144
+ finally:
145
+ active_requests.dec()
146
+
147
+
148
+ @router.get("/health", response_model=HealthResponse)
149
+ async def health_check() -> HealthResponse:
150
+ """Health check endpoint"""
151
+ return HealthResponse(
152
+ status="healthy",
153
+ version=settings.app_version,
154
+ timestamp=datetime.utcnow(),
155
+ services={
156
+ "database": "unknown", # TODO: Add actual health checks
157
+ "redis": "unknown",
158
+ "qdrant": "unknown",
159
+ },
160
+ )
161
+
162
+
163
+ @router.get("/metrics")
164
+ async def metrics():
165
+ """Prometheus metrics endpoint"""
166
+ from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
167
+
168
+ return generate_latest()
app/presentation/api/v1/schemas.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Presentation Layer - Pydantic Schemas for API
3
+ """
4
+ from datetime import datetime
5
+ from typing import List, Optional
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class QueryRequest(BaseModel):
11
+ """Query request schema"""
12
+
13
+ query_text: str = Field(..., min_length=1, max_length=5000, description="User question")
14
+ department: str = Field(..., description="Department context")
15
+ user_id: Optional[str] = Field(None, description="User identifier")
16
+ session_id: Optional[str] = Field(None, description="Session identifier")
17
+ top_k: int = Field(10, ge=1, le=50, description="Number of results to return")
18
+ temperature: float = Field(0.7, ge=0.0, le=1.0, description="LLM temperature")
19
+ max_tokens: int = Field(2048, ge=100, le=4096, description="Max tokens in response")
20
+ filters: dict = Field(default_factory=dict, description="Additional filters")
21
+
22
+
23
+ class SourceSchema(BaseModel):
24
+ """Source citation schema"""
25
+
26
+ title: str
27
+ content: str
28
+ relevance_score: float
29
+ document_id: str
30
+ chunk_index: int
31
+ metadata: dict = Field(default_factory=dict)
32
+
33
+
34
+ class QueryResponse(BaseModel):
35
+ """Query response schema"""
36
+
37
+ query_id: str
38
+ answer: str
39
+ sources: List[SourceSchema]
40
+ confidence: float
41
+ processing_time_ms: int
42
+ tokens_used: int
43
+ model: str
44
+
45
+
46
+ class DocumentUploadRequest(BaseModel):
47
+ """Document upload request schema"""
48
+
49
+ department: str = Field(..., description="Department for the document")
50
+ metadata: dict = Field(default_factory=dict, description="Additional metadata")
51
+
52
+
53
+ class DocumentResponse(BaseModel):
54
+ """Document response schema"""
55
+
56
+ id: str
57
+ title: str
58
+ filename: str
59
+ file_type: str
60
+ file_size: int
61
+ department: str
62
+ status: str
63
+ uploaded_at: datetime
64
+ indexed_at: Optional[datetime] = None
65
+ metadata: dict = Field(default_factory=dict)
66
+
67
+
68
+ class HealthResponse(BaseModel):
69
+ """Health check response"""
70
+
71
+ status: str
72
+ version: str
73
+ timestamp: datetime
74
+ services: dict = Field(default_factory=dict)
75
+
76
+
77
+ class ErrorResponse(BaseModel):
78
+ """Error response schema"""
79
+
80
+ error: str
81
+ detail: Optional[str] = None
82
+ request_id: Optional[str] = None
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.0
2
+ uvicorn[standard]==0.27.0
3
+ pydantic==2.5.3
4
+ pydantic-settings==2.1.0
5
+ sqlalchemy==2.0.25
6
+ asyncpg==0.29.0
7
+ redis[hiredis]==5.0.1
8
+ qdrant-client==1.7.3
9
+ sentence-transformers==2.3.1
10
+ google-generativeai==0.3.2
11
+ alembic==1.13.1
12
+ celery[redis]==5.3.6
13
+ prometheus-client==0.19.0
14
+ structlog==24.1.0
15
+ tenacity==8.2.3
16
+ httpx==0.26.0
17
+ python-multipart==0.0.6
18
+ aiofiles==23.2.1
19
+ psycopg2-binary==2.9.9