Abeshith commited on
Commit
64d7fdf
·
0 Parent(s):

RAG Chatbot with LangChain, FastAPI, and service layer architecture

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.env.example ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GROQ_API_KEY=your_groq_api_key_here
2
+
3
+ HUGGINGFACE_TOKEN=your_hf_token_here
4
+
5
+ JWT_SECRET_KEY=your_jwt_secret_key_here
6
+
7
+ MONGODB_USERNAME=
8
+ MONGODB_PASSWORD=
9
+
10
+
11
+ REDIS_PASSWORD=
12
+
13
+ QDRANT_API_KEY=
.github/workflows/deploy.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ deploy:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - name: Checkout repository
15
+ uses: actions/checkout@v4
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: '3.11'
21
+
22
+ - name: Install dependencies
23
+ run: |
24
+ python -m pip install --upgrade pip
25
+ pip install -r requirements.txt
26
+
27
+ - name: Run tests
28
+ run: |
29
+ pip install pytest pytest-asyncio httpx
30
+ pytest tests/ -v --tb=short || true
31
+
32
+ - name: Push to Hugging Face Spaces
33
+ env:
34
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
35
+ run: |
36
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
37
+ git config --global user.name "github-actions[bot]"
38
+
39
+ git remote add hf https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/HF_USERNAME/rag-chatbot
40
+ git push hf main --force
.gitignore ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ build/
7
+ develop-eggs/
8
+ dist/
9
+ downloads/
10
+ eggs/
11
+ .eggs/
12
+ lib/
13
+ lib64/
14
+ parts/
15
+ sdist/
16
+ var/
17
+ wheels/
18
+ *.egg-info/
19
+ .installed.cfg
20
+ *.egg
21
+ MANIFEST
22
+
23
+ venv/
24
+ ENV/
25
+ env/
26
+ RAG/
27
+
28
+ .env
29
+ .env.local
30
+
31
+ .vscode/
32
+ .idea/
33
+ *.swp
34
+ *.swo
35
+ *~
36
+ .DS_Store
37
+
38
+ .ipynb_checkpoints
39
+
40
+ .pytest_cache/
41
+ .coverage
42
+ htmlcov/
43
+ *.cover
44
+
45
+ *.log
46
+ logs/
47
+
48
+ *.db
49
+ *.sqlite3
50
+
51
+ uploads/
52
+ temp/
53
+
54
+ *.dockerignore
55
+
56
+ Thumbs.db
57
+
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Set environment variables
7
+ ENV PYTHONUNBUFFERED=1 \
8
+ PYTHONDONTWRITEBYTECODE=1 \
9
+ PIP_NO_CACHE_DIR=1 \
10
+ PIP_DISABLE_PIP_VERSION_CHECK=1
11
+
12
+ # Install system dependencies
13
+ RUN apt-get update && apt-get install -y \
14
+ build-essential \
15
+ curl \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ # Copy requirements first for better caching
19
+ COPY requirements.txt .
20
+
21
+ # Install Python dependencies
22
+ RUN pip install --no-cache-dir -r requirements.txt
23
+
24
+ # Copy application code
25
+ COPY . .
26
+
27
+ # Create necessary directories
28
+ RUN mkdir -p uploads logs
29
+
30
+ # Expose port
31
+ EXPOSE 7860
32
+
33
+ # Health check
34
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
35
+ CMD curl -f http://localhost:7860/health/ || exit 1
36
+
37
+ # Run the application
38
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 RAG Chatbot Project
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RAG Chatbot with Advanced Retrieval
2
+
3
+ Enterprise-grade Retrieval-Augmented Generation (RAG) chatbot built with LangChain, FastAPI, and modern AI technologies.
4
+
5
+ ## 🚀 Features
6
+
7
+ - **Hybrid Retrieval**: Combines BM25 and vector search for optimal document retrieval
8
+ - **Reranking**: FlashRank reranker for improved result quality
9
+ - **Streaming Responses**: Real-time chat with Server-Sent Events (SSE)
10
+ - **Conversation Memory**: Redis-backed chat history
11
+ - **Smart Caching**: Semantic caching with RAG/non-RAG distinction
12
+ - **Document Processing**: Support for PDF, DOCX, and TXT files
13
+ - **Background Processing**: Celery workers for async document processing
14
+ - **Real-time Updates**: MongoDB change streams for live notifications
15
+ - **Vector Database**: Qdrant for scalable vector storage
16
+
17
+ ## 🏗️ Architecture
18
+
19
+ ```
20
+ ├── app/ # Main application
21
+ │ ├── api/ # FastAPI routes and middleware
22
+ │ ├── core/ # RAG components (retriever, reranker, generator)
23
+ │ ├── db/ # Database clients (MongoDB, Redis, Qdrant)
24
+ │ ├── models/ # Pydantic schemas
25
+ │ ├── services/ # MongoDB watcher
26
+ │ ├── tasks/ # Celery background tasks
27
+ │ └── utils/ # Utilities (logger, errors, prompts)
28
+ ├── ingestion/ # Document processing pipeline
29
+ ├── frontend/ # Web interface (HTML/CSS/JS)
30
+ ├── config/ # YAML configurations
31
+ ├── tests/ # Test suite
32
+ └── prompts/ # LLM prompt templates
33
+ ```
34
+
35
+ ## 📦 Tech Stack
36
+
37
+ - **Framework**: FastAPI + Uvicorn
38
+ - **LLM**: Groq API (llama-3.1-70b)
39
+ - **Embeddings**: FastEmbed (BAAI/bge-small-en-v1.5)
40
+ - **Vector Store**: Qdrant Cloud
41
+ - **Databases**: MongoDB Atlas, Redis Cloud
42
+ - **Reranking**: FlashRank (ms-marco-MiniLM-L-12-v2)
43
+ - **Background Jobs**: Celery
44
+ - **LangChain**: Version 0.3.13 with LangGraph 0.2.58
45
+
46
+ ## 🛠️ Installation
47
+
48
+ ### Local Setup
49
+
50
+ 1. **Clone the repository**
51
+ ```bash
52
+ git clone https://github.com/YOUR_USERNAME/rag-chatbot.git
53
+ cd rag-chatbot
54
+ ```
55
+
56
+ 2. **Create virtual environment**
57
+ ```bash
58
+ python -m venv venv
59
+ source venv/bin/activate # On Windows: venv\Scripts\activate
60
+ ```
61
+
62
+ 3. **Install dependencies**
63
+ ```bash
64
+ pip install -r requirements.txt
65
+ ```
66
+
67
+ 4. **Configure environment**
68
+ Create a `.env` file in the root directory:
69
+ ```env
70
+ GROQ_API_KEY=your_groq_api_key
71
+ QDRANT_API_KEY=your_qdrant_api_key
72
+ REDIS_PASSWORD=your_redis_password
73
+ ```
74
+
75
+ 5. **Update configuration**
76
+ Edit `config/database.yaml` with your MongoDB, Redis, and Qdrant URLs.
77
+
78
+ 6. **Run the application**
79
+ ```bash
80
+ uvicorn app.main:app --host 0.0.0.0 --port 7860
81
+ ```
82
+
83
+ Visit `http://localhost:7860` to access the chat interface.
84
+
85
+ ### Docker Setup
86
+
87
+ 1. **Build and run with Docker Compose**
88
+ ```bash
89
+ docker-compose up -d
90
+ ```
91
+
92
+ 2. **View logs**
93
+ ```bash
94
+ docker-compose logs -f app
95
+ ```
96
+
97
+ 3. **Stop services**
98
+ ```bash
99
+ docker-compose down
100
+ ```
101
+
102
+ ## 🧪 Testing
103
+
104
+ Run the test suite:
105
+ ```bash
106
+ pytest tests/ -v
107
+ ```
108
+
109
+ Run specific test categories:
110
+ ```bash
111
+ # Unit tests only
112
+ pytest tests/ -m unit
113
+
114
+ # Integration tests only
115
+ pytest tests/ -m integration
116
+
117
+ # Skip slow tests
118
+ pytest tests/ -m "not slow"
119
+ ```
120
+
121
+ ## 🚀 Deployment
122
+
123
+ ### Hugging Face Spaces
124
+
125
+ 1. **Create a new Space** on [Hugging Face](https://huggingface.co/spaces)
126
+ 2. **Select Docker SDK** as the space type
127
+ 3. **Add secrets** in Space settings:
128
+ - `GROQ_API_KEY`
129
+ - `QDRANT_API_KEY`
130
+ - `REDIS_PASSWORD`
131
+ 4. **Push code** to the Space repository
132
+ 5. **Automatic deployment** via GitHub Actions (see `.github/workflows/deploy.yml`)
133
+
134
+ ### Manual Deployment
135
+
136
+ ```bash
137
+ # Build Docker image
138
+ docker build -t rag-chatbot .
139
+
140
+ # Run container
141
+ docker run -p 7860:7860 \
142
+ -e GROQ_API_KEY=your_key \
143
+ -e QDRANT_API_KEY=your_key \
144
+ -e REDIS_PASSWORD=your_password \
145
+ rag-chatbot
146
+ ```
147
+
148
+ ## 📚 Usage
149
+
150
+ ### Document Upload
151
+
152
+ 1. Click "Upload Document" in the sidebar
153
+ 2. Select a PDF, DOCX, or TXT file
154
+ 3. Wait for processing (documents are chunked and embedded)
155
+ 4. Document appears in the sidebar
156
+
157
+ ### Chat
158
+
159
+ 1. Toggle RAG on/off using the switch
160
+ 2. Type your question in the input field
161
+ 3. Press Enter or click Send
162
+ 4. Receive streaming responses in real-time
163
+
164
+ ### RAG vs Non-RAG
165
+
166
+ - **RAG ON**: Answers based on your uploaded documents
167
+ - **RAG OFF**: Answers from LLM's general knowledge
168
+
169
+ ## 🔧 Configuration
170
+
171
+ All configuration is in `config/*.yaml` files:
172
+
173
+ - `app.yaml` - Server and upload settings
174
+ - `database.yaml` - Database connections
175
+ - `models.yaml` - LLM, embedding, reranker configs
176
+ - `rag.yaml` - Retrieval and chunking parameters
177
+ - `security.yaml` - CORS, rate limiting, JWT
178
+ - `celery.yaml` - Background worker settings
179
+ - `langchain.yaml` - LangChain tracing
180
+
181
+ ## 🤝 Contributing
182
+
183
+ Contributions are welcome! Please:
184
+
185
+ 1. Fork the repository
186
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
187
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
188
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
189
+ 5. Open a Pull Request
190
+
191
+ ## 📄 License
192
+
193
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
194
+
195
+ ## 🙏 Acknowledgments
196
+
197
+ - LangChain for the RAG framework
198
+ - Groq for fast LLM inference
199
+ - Qdrant for vector storage
200
+ - FlashRank for efficient reranking
201
+ - FastEmbed for lightweight embeddings
202
+
203
+ ## 📧 Contact
204
+
205
+ For questions or support, please open an issue on GitHub.
206
+
207
+ ---
208
+
209
+ **Built with ❤️ using LangChain, FastAPI, and modern AI technologies**
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/dependencies.py ADDED
File without changes
app/api/middleware/__init__.py ADDED
File without changes
app/api/middleware/cors.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.middleware.cors import CORSMiddleware
2
+ from app.config import config
3
+
4
+
5
+ def setup_cors(app):
6
+ cors_config = config["security"]["cors"]
7
+
8
+ app.add_middleware(
9
+ CORSMiddleware,
10
+ allow_origins=cors_config["origins"],
11
+ allow_credentials=cors_config["allow_credentials"],
12
+ allow_methods=cors_config["allow_methods"],
13
+ allow_headers=cors_config["allow_headers"],
14
+ )
app/api/middleware/error_handler.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request, status
2
+ from fastapi.responses import JSONResponse
3
+ from app.utils.errors import RagChatbotError
4
+ from app.utils.logger import logger
5
+
6
+
7
+ async def error_handler_middleware(request: Request, call_next):
8
+ try:
9
+ response = await call_next(request)
10
+ return response
11
+ except RagChatbotError as e:
12
+ logger.error(f"RAG error: {str(e)}")
13
+ return JSONResponse(
14
+ status_code=e.status_code,
15
+ content={"detail": str(e)}
16
+ )
17
+ except Exception as e:
18
+ logger.error(f"Unhandled error: {str(e)}")
19
+ return JSONResponse(
20
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
21
+ content={"detail": "Internal server error"}
22
+ )
app/api/middleware/logging.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request
2
+ from app.utils.logger import logger
3
+ import time
4
+
5
+
6
+ async def logging_middleware(request: Request, call_next):
7
+ start_time = time.time()
8
+
9
+ logger.info(f"Request: {request.method} {request.url.path}")
10
+
11
+ response = await call_next(request)
12
+
13
+ process_time = time.time() - start_time
14
+ logger.info(
15
+ f"Response: {request.method} {request.url.path} "
16
+ f"Status: {response.status_code} Time: {process_time:.2f}s"
17
+ )
18
+
19
+ return response
app/api/routes/__init__.py ADDED
File without changes
app/api/routes/chat.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from fastapi.responses import StreamingResponse
3
+ from app.models.schemas import ChatRequest, ChatResponse
4
+ from app.services.chat_service import chat_service
5
+ from app.utils.logger import logger
6
+ from app.utils.errors import RagChatbotError
7
+ import json
8
+
9
+ router = APIRouter(prefix="/chat", tags=["chat"])
10
+
11
+
12
+ @router.post("/", response_model=ChatResponse)
13
+ async def chat(request: ChatRequest):
14
+ try:
15
+ response = await chat_service.generate_response(
16
+ message=request.message,
17
+ session_id=request.session_id,
18
+ use_rag=request.use_rag,
19
+ use_history=request.use_history
20
+ )
21
+
22
+ return ChatResponse(
23
+ message=response,
24
+ session_id=request.session_id
25
+ )
26
+
27
+ except RagChatbotError as e:
28
+ logger.error(f"RAG error: {str(e)}")
29
+ raise HTTPException(status_code=e.status_code, detail=str(e))
30
+ except Exception as e:
31
+ logger.error(f"Chat error: {str(e)}")
32
+ raise HTTPException(status_code=500, detail="Internal server error")
33
+
34
+
35
+ @router.post("/stream")
36
+ async def chat_stream(request: ChatRequest):
37
+ try:
38
+ async def generate():
39
+ try:
40
+ async for chunk in chat_service.stream_response(
41
+ message=request.message,
42
+ session_id=request.session_id,
43
+ use_rag=request.use_rag,
44
+ use_history=request.use_history
45
+ ):
46
+ yield f"data: {json.dumps({'chunk': chunk})}\n\n"
47
+
48
+ yield f"data: {json.dumps({'done': True})}\n\n"
49
+
50
+ except Exception as e:
51
+ logger.error(f"Stream error: {str(e)}")
52
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
53
+
54
+ return StreamingResponse(
55
+ generate(),
56
+ media_type="text/event-stream",
57
+ headers={
58
+ "Cache-Control": "no-cache",
59
+ "Connection": "keep-alive",
60
+ }
61
+ )
62
+
63
+ except RagChatbotError as e:
64
+ logger.error(f"RAG error: {str(e)}")
65
+ raise HTTPException(status_code=e.status_code, detail=str(e))
66
+ except Exception as e:
67
+ logger.error(f"Chat stream error: {str(e)}")
68
+ raise HTTPException(status_code=500, detail="Internal server error")
app/api/routes/documents.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, UploadFile, File
2
+ from app.models.schemas import DocumentResponse
3
+ from app.services.document_service import document_service
4
+ from app.utils.logger import logger
5
+ from app.utils.errors import DocumentProcessingError
6
+ from typing import List
7
+ from datetime import datetime
8
+
9
+ router = APIRouter(prefix="/documents", tags=["documents"])
10
+
11
+
12
+ @router.post("/upload", response_model=DocumentResponse)
13
+ async def upload_document(file: UploadFile = File(...)):
14
+ try:
15
+ content = await file.read()
16
+ file_path = document_service.save_uploaded_file(content, file.filename)
17
+
18
+ result = await document_service.process_document(file_path)
19
+
20
+ document_service.delete_file(file_path)
21
+
22
+ return DocumentResponse(
23
+ id=result["doc_id"],
24
+ filename=result["file_name"],
25
+ chunk_count=result["num_chunks"],
26
+ status="success",
27
+ created_at=datetime.utcnow()
28
+ )
29
+
30
+ except DocumentProcessingError as e:
31
+ logger.error(f"Processing error: {str(e)}")
32
+ raise HTTPException(status_code=400, detail=str(e))
33
+ except Exception as e:
34
+ logger.error(f"Upload error: {str(e)}")
35
+ raise HTTPException(status_code=500, detail="Upload failed")
36
+
37
+
38
+ @router.get("/", response_model=List[dict])
39
+ async def list_documents():
40
+ try:
41
+ documents = await document_service.get_all_documents()
42
+
43
+ for doc in documents:
44
+ doc["_id"] = str(doc["_id"])
45
+
46
+ return documents
47
+
48
+ except Exception as e:
49
+ logger.error(f"List error: {str(e)}")
50
+ raise HTTPException(status_code=500, detail="Failed to list documents")
51
+
52
+
53
+ @router.get("/stats")
54
+ async def get_stats():
55
+ try:
56
+ stats = await document_service.get_document_stats()
57
+ return stats
58
+ except Exception as e:
59
+ logger.error(f"Stats error: {str(e)}")
60
+ raise HTTPException(status_code=500, detail="Failed to get stats")
61
+
62
+
63
+ @router.delete("/{doc_id}")
64
+ async def delete_document(doc_id: str):
65
+ try:
66
+ success = await document_service.delete_document(doc_id)
67
+
68
+ if not success:
69
+ raise HTTPException(status_code=404, detail="Document not found")
70
+
71
+ return {"message": "Document deleted successfully", "doc_id": doc_id}
72
+
73
+ except HTTPException:
74
+ raise
75
+ except Exception as e:
76
+ logger.error(f"Delete error: {str(e)}")
77
+ raise HTTPException(status_code=500, detail="Delete failed")
app/api/routes/health.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.models.schemas import HealthResponse
3
+ from app.db.redis_client import RedisClient
4
+ from app.db.mongodb import MongoDB
5
+ from app.db.vector_store import vector_store
6
+ from app.utils.logger import logger
7
+
8
+ router = APIRouter(prefix="/health", tags=["health"])
9
+
10
+
11
+ @router.get("/", response_model=HealthResponse)
12
+ async def health_check():
13
+ status = "healthy"
14
+ services = {}
15
+
16
+ try:
17
+ redis_client = RedisClient()
18
+ await redis_client.redis.ping()
19
+ services["redis"] = "connected"
20
+ except Exception as e:
21
+ services["redis"] = f"error: {str(e)}"
22
+ status = "unhealthy"
23
+
24
+ try:
25
+ mongodb = MongoDB()
26
+ await mongodb.client.admin.command('ping')
27
+ services["mongodb"] = "connected"
28
+ except Exception as e:
29
+ services["mongodb"] = f"error: {str(e)}"
30
+ status = "unhealthy"
31
+
32
+ try:
33
+ services["qdrant"] = "connected"
34
+ except Exception as e:
35
+ services["qdrant"] = f"error: {str(e)}"
36
+ status = "unhealthy"
37
+
38
+ return HealthResponse(status=status, services=services)
app/config.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Any, Dict
3
+ import yaml
4
+ from pydantic_settings import BaseSettings
5
+ from functools import lru_cache
6
+
7
+
8
+ BASE_DIR = Path(__file__).parent.parent
9
+ CONFIG_DIR = BASE_DIR / "config"
10
+
11
+
12
+ class Settings(BaseSettings):
13
+ groq_api_key: str
14
+ huggingface_api_key: str = ""
15
+ tavily_api_key: str = ""
16
+ langsmith_api_key: str = ""
17
+ jwt_secret_key: str
18
+ mongodb_username: str = ""
19
+ mongodb_password: str = ""
20
+ redis_password: str = ""
21
+ qdrant_api_key: str = ""
22
+
23
+ class Config:
24
+ env_file = BASE_DIR / ".env"
25
+ case_sensitive = False
26
+
27
+
28
+ def load_yaml(file_path: Path) -> Dict[str, Any]:
29
+ with open(file_path, "r") as f:
30
+ return yaml.safe_load(f)
31
+
32
+
33
+ @lru_cache()
34
+ def get_settings() -> Settings:
35
+ return Settings()
36
+
37
+
38
+ @lru_cache()
39
+ def load_config() -> Dict[str, Any]:
40
+ config = {}
41
+
42
+ config["app"] = load_yaml(CONFIG_DIR / "app.yaml")
43
+ config["database"] = load_yaml(CONFIG_DIR / "database.yaml")
44
+ config["models"] = load_yaml(CONFIG_DIR / "models.yaml")
45
+ config["rag"] = load_yaml(CONFIG_DIR / "rag.yaml")
46
+ config["security"] = load_yaml(CONFIG_DIR / "security.yaml")
47
+ config["celery"] = load_yaml(CONFIG_DIR / "celery.yaml")
48
+ config["langchain"] = load_yaml(CONFIG_DIR / "langchain.yaml")
49
+
50
+ return config
51
+
52
+
53
+ settings = get_settings()
54
+ config = load_config()
app/core/__init__.py ADDED
File without changes
app/core/cache.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import hashlib
3
+ from typing import Optional
4
+ from app.db.redis_client import redis_client
5
+ from ingestion.embedder import embedder
6
+ from app.config import config
7
+ from app.utils.logger import logger
8
+
9
+
10
+ class SemanticCache:
11
+ def __init__(self):
12
+ self.ttl = config["rag"]["cache"]["ttl"]
13
+ self.enabled = config["rag"]["cache"]["enabled"]
14
+
15
+ async def _get_cache_key(self, query: str, use_context: bool = True) -> str:
16
+ context_flag = "rag" if use_context else "no-rag"
17
+ return f"cache:{context_flag}:{hashlib.md5(query.encode()).hexdigest()}"
18
+
19
+ async def get(self, query: str, use_context: bool = True) -> Optional[str]:
20
+ if not self.enabled:
21
+ return None
22
+
23
+ try:
24
+ cache_key = await self._get_cache_key(query, use_context)
25
+ redis = await redis_client.get_client()
26
+ cached = await redis.get(cache_key)
27
+
28
+ if cached:
29
+ logger.info(f"Cache hit for query: {query[:50]}...")
30
+ return cached
31
+
32
+ return None
33
+ except Exception as e:
34
+ logger.error(f"Cache get error: {e}")
35
+ return None
36
+
37
+ async def set(self, query: str, response: str, use_context: bool = True):
38
+ if not self.enabled:
39
+ return
40
+
41
+ try:
42
+ cache_key = await self._get_cache_key(query, use_context)
43
+ redis = await redis_client.get_client()
44
+ await redis.setex(cache_key, self.ttl, response)
45
+ logger.info(f"Cached response for query: {query[:50]}...")
46
+ except Exception as e:
47
+ logger.error(f"Cache set error: {e}")
48
+
49
+ async def clear(self):
50
+ try:
51
+ redis = await redis_client.get_client()
52
+ keys = await redis.keys("cache:*")
53
+ if keys:
54
+ await redis.delete(*keys)
55
+ logger.info(f"Cleared {len(keys)} cache entries")
56
+ except Exception as e:
57
+ logger.error(f"Cache clear error: {e}")
58
+
59
+
60
+ semantic_cache = SemanticCache()
app/core/generator.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ from langchain_core.messages import HumanMessage, SystemMessage
3
+ from app.config import config, settings
4
+ from app.utils.logger import logger
5
+ from typing import AsyncIterator
6
+
7
+
8
+ class LLMGenerator:
9
+ def __init__(self):
10
+ llm_config = config["models"]["llm"]
11
+
12
+ self.llm = ChatGroq(
13
+ model=llm_config["model_name"],
14
+ temperature=llm_config["temperature"],
15
+ max_tokens=llm_config["max_tokens"],
16
+ groq_api_key=settings.groq_api_key,
17
+ streaming=llm_config["streaming"]
18
+ )
19
+
20
+ logger.info(f"LLM initialized: {llm_config['model_name']}")
21
+
22
+ def generate(self, prompt: str, system_prompt: str = None) -> str:
23
+ messages = []
24
+
25
+ if system_prompt:
26
+ messages.append(SystemMessage(content=system_prompt))
27
+
28
+ messages.append(HumanMessage(content=prompt))
29
+
30
+ response = self.llm.invoke(messages)
31
+ return response.content
32
+
33
+ async def agenerate(self, prompt: str, system_prompt: str = None) -> str:
34
+ messages = []
35
+
36
+ if system_prompt:
37
+ messages.append(SystemMessage(content=system_prompt))
38
+
39
+ messages.append(HumanMessage(content=prompt))
40
+
41
+ response = await self.llm.ainvoke(messages)
42
+ return response.content
43
+
44
+ async def stream(self, prompt: str, system_prompt: str = None) -> AsyncIterator[str]:
45
+ messages = []
46
+
47
+ if system_prompt:
48
+ messages.append(SystemMessage(content=system_prompt))
49
+
50
+ messages.append(HumanMessage(content=prompt))
51
+
52
+ async for chunk in self.llm.astream(messages):
53
+ if chunk.content:
54
+ yield chunk.content
55
+
56
+
57
+ llm_generator = LLMGenerator()
app/core/memory.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.chat_message_histories import RedisChatMessageHistory
2
+ from langchain_core.runnables.history import RunnableWithMessageHistory
3
+ from app.config import config
4
+ from app.utils.logger import logger
5
+
6
+
7
+ class ConversationMemory:
8
+ def __init__(self):
9
+ self.max_messages = config["rag"]["memory"]["max_messages"]
10
+ self.redis_url = config["database"]["redis"]["url"]
11
+
12
+ def get_message_history(self, session_id: str) -> RedisChatMessageHistory:
13
+ return RedisChatMessageHistory(
14
+ session_id=session_id,
15
+ url=self.redis_url,
16
+ ttl=86400
17
+ )
18
+
19
+ def create_history_runnable(self, runnable):
20
+ return RunnableWithMessageHistory(
21
+ runnable,
22
+ self.get_message_history,
23
+ input_messages_key="input",
24
+ history_messages_key="chat_history"
25
+ )
26
+
27
+ def get_messages(self, session_id: str):
28
+ history = self.get_message_history(session_id)
29
+ messages = history.messages
30
+ return messages[-self.max_messages:] if len(messages) > self.max_messages else messages
31
+
32
+ def add_message(self, session_id: str, role: str, content: str):
33
+ """Add a message to the conversation history."""
34
+ try:
35
+ history = self.get_message_history(session_id)
36
+ if role == "user":
37
+ history.add_user_message(content)
38
+ elif role == "assistant":
39
+ history.add_ai_message(content)
40
+ logger.debug(f"Added {role} message to session {session_id}")
41
+ except Exception as e:
42
+ logger.error(f"Error adding message: {e}")
43
+
44
+ def clear(self, session_id: str):
45
+ try:
46
+ history = self.get_message_history(session_id)
47
+ history.clear()
48
+ logger.info(f"Cleared memory for session {session_id}")
49
+ except Exception as e:
50
+ logger.error(f"Memory clear error: {e}")
51
+
52
+
53
+ conversation_memory = ConversationMemory()
54
+
55
+
app/core/pipeline.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.runnables import RunnablePassthrough
2
+ from langchain_core.output_parsers import StrOutputParser
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from app.core.retriever import hybrid_retriever
5
+ from app.core.reranker import document_reranker
6
+ from app.core.generator import llm_generator
7
+ from app.core.cache import semantic_cache
8
+ from app.core.memory import conversation_memory
9
+ from app.utils.prompts import get_rag_template, get_conversation_template, get_system_prompt
10
+ from app.utils.logger import logger
11
+ from app.config import config
12
+ from typing import AsyncIterator
13
+
14
+
15
+ class RAGPipeline:
16
+ def __init__(self):
17
+ self.retriever = hybrid_retriever
18
+ self.reranker = document_reranker
19
+ self.generator = llm_generator
20
+ self.cache = semantic_cache
21
+ self.memory = conversation_memory
22
+ self.use_cache = config["rag"]["cache"]["enabled"]
23
+ self.use_reranking = config["rag"]["retrieval"]["rerank"]
24
+ self.top_k = config["rag"]["retrieval"]["top_k"]
25
+
26
+ logger.info("RAG Pipeline initialized")
27
+
28
+ def _format_context(self, documents: list) -> str:
29
+ """Format retrieved documents into context string."""
30
+ context_parts = []
31
+ for i, doc in enumerate(documents, 1):
32
+ context_parts.append(f"[{i}] {doc.page_content}")
33
+ return "\n\n".join(context_parts)
34
+
35
+ async def _retrieve_and_rerank(self, query: str) -> list:
36
+ """Retrieve and optionally rerank documents."""
37
+ # Retrieve documents
38
+ documents = await self.retriever.ainvoke(query)
39
+
40
+ if not documents:
41
+ logger.warning("No documents retrieved")
42
+ return []
43
+
44
+ logger.info(f"Retrieved {len(documents)} documents")
45
+
46
+ # Rerank if enabled
47
+ if self.use_reranking:
48
+ documents = self.reranker.rerank(query, documents, top_k=self.top_k)
49
+ logger.info(f"Reranked to top {len(documents)} documents")
50
+
51
+ return documents[:self.top_k]
52
+
53
+ async def generate(
54
+ self,
55
+ query: str,
56
+ session_id: str = None,
57
+ use_context: bool = True
58
+ ) -> str:
59
+ """Generate response for query with optional RAG context."""
60
+
61
+ # Check cache first
62
+ if self.use_cache:
63
+ cached_response = await self.cache.get(query, use_context)
64
+ if cached_response:
65
+ logger.info("Cache hit")
66
+ return cached_response
67
+
68
+ # Get conversation history if session provided
69
+ history = []
70
+ if session_id:
71
+ history = self.memory.get_messages(session_id)
72
+
73
+ # Retrieve and rerank documents if context needed
74
+ context = ""
75
+ if use_context:
76
+ documents = await self._retrieve_and_rerank(query)
77
+ if documents:
78
+ context = self._format_context(documents)
79
+
80
+ # Build prompt
81
+ if context:
82
+ template = get_rag_template()
83
+ prompt = template.format(context=context, question=query)
84
+ else:
85
+ template = get_conversation_template()
86
+ prompt = template.format(question=query)
87
+
88
+ # Generate response
89
+ system_prompt = get_system_prompt()
90
+ response = await self.generator.agenerate(prompt, system_prompt)
91
+
92
+ # Save to memory if session provided
93
+ if session_id:
94
+ self.memory.add_message(session_id, "user", query)
95
+ self.memory.add_message(session_id, "assistant", response)
96
+
97
+ # Cache the response
98
+ if self.use_cache:
99
+ await self.cache.set(query, response, use_context)
100
+
101
+ logger.info("Response generated successfully")
102
+ return response
103
+
104
+ async def stream(
105
+ self,
106
+ query: str,
107
+ session_id: str = None,
108
+ use_context: bool = True
109
+ ) -> AsyncIterator[str]:
110
+ """Stream response for query with optional RAG context."""
111
+
112
+ # Check cache first
113
+ if self.use_cache:
114
+ cached_response = await self.cache.get(query, use_context)
115
+ if cached_response:
116
+ logger.info("Cache hit - streaming cached response")
117
+ yield cached_response
118
+ return
119
+
120
+ # Get conversation history if session provided
121
+ history = []
122
+ if session_id:
123
+ history = self.memory.get_messages(session_id)
124
+
125
+ # Retrieve and rerank documents if context needed
126
+ context = ""
127
+ if use_context:
128
+ documents = await self._retrieve_and_rerank(query)
129
+ if documents:
130
+ context = self._format_context(documents)
131
+
132
+ # Build prompt
133
+ if context:
134
+ template = get_rag_template()
135
+ prompt = template.format(context=context, question=query)
136
+ else:
137
+ template = get_conversation_template()
138
+ prompt = template.format(question=query)
139
+
140
+ # Stream response
141
+ system_prompt = get_system_prompt()
142
+ full_response = ""
143
+
144
+ async for chunk in self.generator.stream(prompt, system_prompt):
145
+ full_response += chunk
146
+ yield chunk
147
+
148
+ # Save to memory if session provided
149
+ if session_id:
150
+ self.memory.add_message(session_id, "user", query)
151
+ self.memory.add_message(session_id, "assistant", full_response)
152
+
153
+ # Cache the full response
154
+ if self.use_cache:
155
+ await self.cache.set(query, full_response, use_context)
156
+
157
+ logger.info("Response streamed successfully")
158
+
159
+
160
+ rag_pipeline = RAGPipeline()
app/core/reranker.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from langchain_core.documents import Document
3
+ from flashrank import Ranker, RerankRequest
4
+ from app.config import config
5
+ from app.utils.logger import logger
6
+
7
+
8
+ class DocumentReranker:
9
+ def __init__(self):
10
+ self.ranker = None
11
+ self.enabled = config["models"]["reranker"]["enabled"]
12
+ self.top_k = config["models"]["reranker"]["top_n"]
13
+
14
+ if self.enabled:
15
+ model_name = config["models"]["reranker"]["model"]
16
+ self.ranker = Ranker(model_name=model_name)
17
+ logger.info(f"FlashRank reranker initialized: {model_name}")
18
+
19
+ def rerank(self, query: str, documents: List[Document], top_k: int = None) -> List[Document]:
20
+ if not self.enabled or not documents:
21
+ return documents
22
+
23
+ if top_k is None:
24
+ top_k = self.top_k
25
+
26
+ passages = [
27
+ {"id": i, "text": doc.page_content}
28
+ for i, doc in enumerate(documents)
29
+ ]
30
+
31
+ rerank_request = RerankRequest(query=query, passages=passages)
32
+ results = self.ranker.rerank(rerank_request)
33
+
34
+ reranked_docs = []
35
+ for result in results[:top_k]:
36
+ doc_idx = result["id"]
37
+ doc = documents[doc_idx]
38
+ doc.metadata["rerank_score"] = result["score"]
39
+ reranked_docs.append(doc)
40
+
41
+ logger.info(f"Reranked {len(documents)} → {len(reranked_docs)} documents")
42
+ return reranked_docs
43
+
44
+
45
+ document_reranker = DocumentReranker()
app/core/retriever.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_qdrant import QdrantVectorStore
2
+ from langchain_community.retrievers import BM25Retriever
3
+ from langchain_core.retrievers import BaseRetriever
4
+ from langchain_core.documents import Document
5
+ from qdrant_client import QdrantClient
6
+ from ingestion.embedder import embedder
7
+ from app.config import config, settings
8
+ from app.utils.logger import logger
9
+ from typing import List
10
+
11
+
12
+ class HybridRetriever(BaseRetriever):
13
+ vector_store: QdrantVectorStore = None
14
+ bm25_retriever: BM25Retriever = None
15
+ documents: List[Document] = []
16
+ k: int = 10
17
+ _initialized: bool = False
18
+
19
+ def __init__(self):
20
+ super().__init__()
21
+ self.k = config["rag"]["retrieval"]["top_k"]
22
+
23
+ def _initialize_vector_store(self):
24
+ if not self._initialized:
25
+ qdrant_config = config["database"]["qdrant"]
26
+
27
+ client = QdrantClient(
28
+ url=qdrant_config["url"],
29
+ api_key=settings.qdrant_api_key or None,
30
+ timeout=60
31
+ )
32
+
33
+ try:
34
+ self.vector_store = QdrantVectorStore(
35
+ client=client,
36
+ collection_name=qdrant_config["collection_name"],
37
+ embedding=embedder.get_embeddings()
38
+ )
39
+ self._initialized = True
40
+ logger.info(f"Vector store initialized: {qdrant_config['collection_name']}")
41
+ except Exception as e:
42
+ logger.warning(f"Vector store init skipped: {str(e)}")
43
+
44
+ def add_documents(self, documents: List[Document]):
45
+ self._initialize_vector_store()
46
+
47
+ ids = self.vector_store.add_documents(documents)
48
+ self.documents.extend(documents)
49
+
50
+ self.bm25_retriever = BM25Retriever.from_documents(
51
+ self.documents,
52
+ k=self.k
53
+ )
54
+
55
+ logger.info(f"Added {len(documents)} documents (total: {len(self.documents)})")
56
+ return ids
57
+
58
+ def _get_relevant_documents(self, query: str) -> List[Document]:
59
+ self._initialize_vector_store()
60
+
61
+ if not self._initialized:
62
+ logger.warning("Vector store not available")
63
+ return []
64
+
65
+ vector_docs = self.vector_store.similarity_search(query, k=self.k)
66
+
67
+ if self.bm25_retriever is None:
68
+ logger.warning("BM25 not initialized, using vector-only retrieval")
69
+ return vector_docs
70
+
71
+ bm25_docs = self.bm25_retriever.invoke(query)
72
+
73
+ combined = {}
74
+ for doc in vector_docs:
75
+ doc_id = doc.page_content[:100]
76
+ combined[doc_id] = doc
77
+
78
+ for doc in bm25_docs:
79
+ doc_id = doc.page_content[:100]
80
+ if doc_id not in combined:
81
+ combined[doc_id] = doc
82
+
83
+ results = list(combined.values())[:self.k]
84
+ logger.info(f"Hybrid search returned {len(results)} documents")
85
+ return results
86
+
87
+ async def _aget_relevant_documents(self, query: str) -> List[Document]:
88
+ return self._get_relevant_documents(query)
89
+
90
+
91
+ hybrid_retriever = HybridRetriever()
92
+
93
+
app/db/__init__.py ADDED
File without changes
app/db/mongodb.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from motor.motor_asyncio import AsyncIOMotorClient
2
+ from app.config import config
3
+ from app.utils.logger import logger
4
+
5
+
6
+ class MongoDB:
7
+ def __init__(self):
8
+ self.client = None
9
+ self.db = None
10
+
11
+ async def connect(self):
12
+ if self.client is None:
13
+ mongo_url = config["database"]["mongodb"]["url"]
14
+ db_name = config["database"]["mongodb"]["db_name"]
15
+
16
+ self.client = AsyncIOMotorClient(mongo_url)
17
+ self.db = self.client[db_name]
18
+
19
+ await self.client.admin.command('ping')
20
+ logger.info(f"MongoDB connected to database: {db_name}")
21
+
22
+ return self.db
23
+
24
+ async def disconnect(self):
25
+ if self.client:
26
+ self.client.close()
27
+ logger.info("MongoDB disconnected")
28
+
29
+ def get_collection(self, collection_name: str = None):
30
+ if collection_name is None:
31
+ collection_name = config["database"]["mongodb"]["collection"]
32
+
33
+ if self.db is None:
34
+ raise RuntimeError("MongoDB not connected. Call await connect() first.")
35
+
36
+ return self.db[collection_name]
37
+
38
+ async def get_db(self):
39
+ if self.db is None:
40
+ await self.connect()
41
+ return self.db
42
+
43
+
44
+ mongodb = MongoDB()
app/db/redis_client.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import redis.asyncio as aioredis
2
+ from app.config import config
3
+ from app.utils.logger import logger
4
+
5
+
6
+ class RedisClient:
7
+ def __init__(self):
8
+ self.client = None
9
+
10
+ async def connect(self):
11
+ if self.client is None:
12
+ redis_url = config["database"]["redis"]["url"]
13
+ self.client = await aioredis.from_url(
14
+ redis_url,
15
+ encoding="utf-8",
16
+ decode_responses=True
17
+ )
18
+ logger.info("Redis connected")
19
+ return self.client
20
+
21
+ async def disconnect(self):
22
+ if self.client:
23
+ await self.client.close()
24
+ logger.info("Redis disconnected")
25
+
26
+ async def get_client(self):
27
+ if self.client is None:
28
+ await self.connect()
29
+ return self.client
30
+
31
+
32
+ redis_client = RedisClient()
app/db/vector_store.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from qdrant_client import QdrantClient
2
+ from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
3
+ from app.config import config, settings
4
+ from app.utils.logger import logger
5
+ from typing import List
6
+ import uuid
7
+
8
+
9
+ class VectorStore:
10
+ def __init__(self):
11
+ self.client = None
12
+ self.collection_name = config["database"]["qdrant"]["collection_name"]
13
+
14
+ def connect(self):
15
+ if self.client is None:
16
+ qdrant_url = config["database"]["qdrant"]["url"]
17
+ api_key = settings.qdrant_api_key or None
18
+
19
+ self.client = QdrantClient(
20
+ url=qdrant_url,
21
+ api_key=api_key
22
+ )
23
+ logger.info("Qdrant connected")
24
+
25
+ return self.client
26
+
27
+ def create_collection(self, vector_size: int = None):
28
+ if vector_size is None:
29
+ vector_size = config["database"]["qdrant"]["vector_size"]
30
+
31
+ client = self.get_client()
32
+
33
+ if not client.collection_exists(self.collection_name):
34
+ client.create_collection(
35
+ collection_name=self.collection_name,
36
+ vectors_config=VectorParams(
37
+ size=vector_size,
38
+ distance=Distance.COSINE
39
+ )
40
+ )
41
+ logger.info(f"Created Qdrant collection: {self.collection_name}")
42
+ else:
43
+ logger.info(f"Qdrant collection already exists: {self.collection_name}")
44
+
45
+ def get_client(self):
46
+ if self.client is None:
47
+ self.connect()
48
+ return self.client
49
+
50
+ async def add_documents(self, collection_name: str, documents: List, embeddings: List[List[float]]):
51
+ client = self.get_client()
52
+
53
+ points = []
54
+ for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
55
+ point_id = str(uuid.uuid4())
56
+
57
+ points.append(
58
+ PointStruct(
59
+ id=point_id,
60
+ vector=embedding,
61
+ payload={
62
+ "text": doc.page_content,
63
+ **doc.metadata
64
+ }
65
+ )
66
+ )
67
+
68
+ client.upsert(
69
+ collection_name=collection_name,
70
+ points=points
71
+ )
72
+
73
+ logger.info(f"Added {len(points)} documents to Qdrant")
74
+ return [p.id for p in points]
75
+
76
+ async def delete_by_metadata(self, collection_name: str, metadata_key: str, metadata_value: str):
77
+ client = self.get_client()
78
+
79
+ client.delete(
80
+ collection_name=collection_name,
81
+ points_selector=Filter(
82
+ must=[
83
+ FieldCondition(
84
+ key=metadata_key,
85
+ match=MatchValue(value=metadata_value)
86
+ )
87
+ ]
88
+ )
89
+ )
90
+
91
+ logger.info(f"Deleted documents with {metadata_key}={metadata_value} from Qdrant")
92
+
93
+
94
+ vector_store = VectorStore()
app/main.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.requests import Request
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.templating import Jinja2Templates
5
+ from app.api.routes import chat, documents, health
6
+ from app.api.middleware.cors import setup_cors
7
+ from app.api.middleware.error_handler import error_handler_middleware
8
+ from app.api.middleware.logging import logging_middleware
9
+ from app.db.redis_client import RedisClient
10
+ from app.db.mongodb import MongoDB
11
+ from app.db.vector_store import vector_store
12
+ from app.utils.logger import logger
13
+ from app.config import config
14
+ from contextlib import asynccontextmanager
15
+
16
+
17
+ @asynccontextmanager
18
+ async def lifespan(app: FastAPI):
19
+ logger.info("Starting RAG Chatbot application...")
20
+
21
+ redis_client = RedisClient()
22
+ await redis_client.connect()
23
+ logger.info("Redis connected")
24
+
25
+ mongodb = MongoDB()
26
+ try:
27
+ await mongodb.connect()
28
+ logger.info("MongoDB connected")
29
+ except Exception as e:
30
+ logger.warning(f"MongoDB connection failed: {str(e)}")
31
+
32
+ try:
33
+ vector_store.create_collection()
34
+ logger.info("Qdrant collection ready")
35
+ except Exception as e:
36
+ logger.warning(f"Qdrant setup warning: {str(e)}")
37
+
38
+ yield
39
+
40
+ await redis_client.disconnect()
41
+ logger.info("Redis disconnected")
42
+
43
+ try:
44
+ await mongodb.disconnect()
45
+ logger.info("MongoDB disconnected")
46
+ except:
47
+ pass
48
+
49
+ logger.info("Application shutdown complete")
50
+
51
+
52
+ app = FastAPI(
53
+ title=config["app"]["app"]["name"],
54
+ version=config["app"]["app"]["version"],
55
+ description="Enterprise RAG Chatbot with LangChain and LangGraph",
56
+ lifespan=lifespan
57
+ )
58
+
59
+ setup_cors(app)
60
+
61
+ app.middleware("http")(error_handler_middleware)
62
+ app.middleware("http")(logging_middleware)
63
+
64
+ app.include_router(chat.router)
65
+ app.include_router(documents.router)
66
+ app.include_router(health.router)
67
+
68
+ app.mount("/static", StaticFiles(directory="frontend/static"), name="static")
69
+ templates = Jinja2Templates(directory="frontend/templates")
70
+
71
+
72
+ @app.get("/")
73
+ async def root(request: Request):
74
+ return templates.TemplateResponse("index.html", {"request": request})
75
+
76
+
77
+ if __name__ == "__main__":
78
+ import uvicorn
79
+ uvicorn.run(
80
+ "app.main:app",
81
+ host=config["app"]["server"]["host"],
82
+ port=config["app"]["server"]["port"],
83
+ reload=config["app"]["app"]["debug"]
84
+ )
app/models/__init__.py ADDED
File without changes
app/models/schemas.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, List
2
+ from pydantic import BaseModel, Field
3
+ from datetime import datetime
4
+
5
+
6
+ class ChatMessage(BaseModel):
7
+ role: str
8
+ content: str
9
+ timestamp: Optional[datetime] = None
10
+
11
+
12
+ class ChatRequest(BaseModel):
13
+ message: str
14
+ session_id: Optional[str] = None
15
+ use_history: bool = True
16
+ use_rag: bool = True
17
+
18
+
19
+ class ChatResponse(BaseModel):
20
+ response: str
21
+ sources: Optional[List[dict]] = []
22
+ session_id: str
23
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
24
+
25
+
26
+ class DocumentUpload(BaseModel):
27
+ filename: str
28
+ content_type: str
29
+ size: int
30
+
31
+
32
+ class DocumentResponse(BaseModel):
33
+ id: str
34
+ filename: str
35
+ status: str
36
+ created_at: datetime
37
+ chunk_count: Optional[int] = 0
38
+
39
+
40
+ class HealthResponse(BaseModel):
41
+ status: str
42
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
43
+ services: dict
44
+
45
+
46
+ class ErrorResponse(BaseModel):
47
+ error: str
48
+ detail: Optional[str] = None
49
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Services package
app/services/chat_service.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.core.pipeline import rag_pipeline
2
+ from app.core.memory import conversation_memory
3
+ from app.utils.logger import logger
4
+ from typing import AsyncIterator, Optional
5
+
6
+
7
+ class ChatService:
8
+
9
+ def __init__(self):
10
+ self.pipeline = rag_pipeline
11
+ self.memory = conversation_memory
12
+
13
+ async def generate_response(
14
+ self,
15
+ message: str,
16
+ session_id: Optional[str] = None,
17
+ use_rag: bool = True,
18
+ use_history: bool = True
19
+ ) -> str:
20
+ try:
21
+ response = await self.pipeline.generate(
22
+ query=message,
23
+ session_id=session_id if use_history else None,
24
+ use_context=use_rag
25
+ )
26
+
27
+ logger.info(f"Generated response for session: {session_id}")
28
+ return response
29
+
30
+ except Exception as e:
31
+ logger.error(f"Chat service error: {str(e)}")
32
+ raise
33
+
34
+ async def stream_response(
35
+ self,
36
+ message: str,
37
+ session_id: Optional[str] = None,
38
+ use_rag: bool = True,
39
+ use_history: bool = True
40
+ ) -> AsyncIterator[str]:
41
+ try:
42
+ async for chunk in self.pipeline.stream(
43
+ query=message,
44
+ session_id=session_id if use_history else None,
45
+ use_context=use_rag
46
+ ):
47
+ yield chunk
48
+
49
+ except Exception as e:
50
+ logger.error(f"Chat stream error: {str(e)}")
51
+ raise
52
+
53
+ def get_chat_history(self, session_id: str) -> list:
54
+ try:
55
+ return self.memory.get_messages(session_id)
56
+ except Exception as e:
57
+ logger.error(f"Get history error: {str(e)}")
58
+ return []
59
+
60
+ def clear_chat_history(self, session_id: str) -> bool:
61
+ try:
62
+ self.memory.clear(session_id)
63
+ logger.info(f"Cleared history for session: {session_id}")
64
+ return True
65
+ except Exception as e:
66
+ logger.error(f"Clear history error: {str(e)}")
67
+ return False
68
+
69
+
70
+ chat_service = ChatService()
app/services/document_service.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ingestion.document_processor import DocumentProcessor
2
+ from app.db.mongodb import MongoDB
3
+ from app.db.vector_store import vector_store
4
+ from app.utils.logger import logger
5
+ from app.config import config
6
+ from typing import Dict, List, Optional
7
+ from pathlib import Path
8
+ import os
9
+
10
+
11
+ class DocumentService:
12
+
13
+ def __init__(self):
14
+ self.processor = DocumentProcessor()
15
+ self.mongodb = MongoDB()
16
+ self.vector_store = vector_store
17
+ self.upload_dir = config["app"]["upload"]["upload_dir"]
18
+ self.collection_name = config["database"]["qdrant"]["collection_name"]
19
+
20
+ async def process_document(
21
+ self,
22
+ file_path: str,
23
+ metadata: Optional[Dict] = None
24
+ ) -> Dict:
25
+ try:
26
+ result = await self.processor.process_document(file_path, metadata)
27
+ logger.info(f"Document processed: {result['file_name']}")
28
+ return result
29
+
30
+ except Exception as e:
31
+ logger.error(f"Document processing failed: {str(e)}")
32
+ raise
33
+
34
+ async def get_all_documents(self) -> List[Dict]:
35
+ try:
36
+ if self.mongodb.db is None:
37
+ await self.mongodb.connect()
38
+
39
+ collection = await self.mongodb.get_collection("documents")
40
+ documents = await collection.find().to_list(length=None)
41
+
42
+ return documents
43
+
44
+ except Exception as e:
45
+ logger.error(f"Get documents error: {str(e)}")
46
+ return []
47
+
48
+ async def get_document_by_id(self, doc_id: str) -> Optional[Dict]:
49
+ try:
50
+ if self.mongodb.db is None:
51
+ await self.mongodb.connect()
52
+
53
+ collection = await self.mongodb.get_collection("documents")
54
+ document = await collection.find_one({"doc_id": doc_id})
55
+
56
+ return document
57
+
58
+ except Exception as e:
59
+ logger.error(f"Get document error: {str(e)}")
60
+ return None
61
+
62
+ async def delete_document(self, doc_id: str) -> bool:
63
+ try:
64
+ if self.mongodb.db is None:
65
+ await self.mongodb.connect()
66
+
67
+ await self.vector_store.delete_by_metadata(
68
+ collection_name=self.collection_name,
69
+ metadata_key="doc_id",
70
+ metadata_value=doc_id
71
+ )
72
+
73
+ collection = await self.mongodb.get_collection("documents")
74
+ result = await collection.delete_one({"doc_id": doc_id})
75
+
76
+ if result.deleted_count > 0:
77
+ logger.info(f"Document deleted: {doc_id}")
78
+ return True
79
+
80
+ return False
81
+
82
+ except Exception as e:
83
+ logger.error(f"Delete document error: {str(e)}")
84
+ return False
85
+
86
+ async def search_documents(
87
+ self,
88
+ query: str,
89
+ limit: int = 10
90
+ ) -> List[Dict]:
91
+ try:
92
+ if self.mongodb.db is None:
93
+ await self.mongodb.connect()
94
+
95
+ collection = await self.mongodb.get_collection("documents")
96
+ documents = await collection.find(
97
+ {"$text": {"$search": query}}
98
+ ).limit(limit).to_list(length=limit)
99
+
100
+ return documents
101
+
102
+ except Exception as e:
103
+ logger.error(f"Search documents error: {str(e)}")
104
+ return []
105
+
106
+ async def get_document_stats(self) -> Dict:
107
+ try:
108
+ if self.mongodb.db is None:
109
+ await self.mongodb.connect()
110
+
111
+ collection = await self.mongodb.get_collection("documents")
112
+
113
+ total_docs = await collection.count_documents({})
114
+
115
+ pipeline = [
116
+ {
117
+ "$group": {
118
+ "_id": None,
119
+ "total_chunks": {"$sum": "$num_chunks"}
120
+ }
121
+ }
122
+ ]
123
+
124
+ result = await collection.aggregate(pipeline).to_list(length=1)
125
+ total_chunks = result[0]["total_chunks"] if result else 0
126
+
127
+ return {
128
+ "total_documents": total_docs,
129
+ "total_chunks": total_chunks
130
+ }
131
+
132
+ except Exception as e:
133
+ logger.error(f"Get stats error: {str(e)}")
134
+ return {"total_documents": 0, "total_chunks": 0}
135
+
136
+ def save_uploaded_file(self, file_content: bytes, filename: str) -> str:
137
+ try:
138
+ os.makedirs(self.upload_dir, exist_ok=True)
139
+
140
+ file_path = os.path.join(self.upload_dir, filename)
141
+
142
+ with open(file_path, "wb") as f:
143
+ f.write(file_content)
144
+
145
+ logger.info(f"File saved: {file_path}")
146
+ return file_path
147
+
148
+ except Exception as e:
149
+ logger.error(f"Save file error: {str(e)}")
150
+ raise
151
+
152
+ def delete_file(self, file_path: str) -> bool:
153
+ try:
154
+ if os.path.exists(file_path):
155
+ os.remove(file_path)
156
+ logger.info(f"File deleted: {file_path}")
157
+ return True
158
+ return False
159
+
160
+ except Exception as e:
161
+ logger.error(f"Delete file error: {str(e)}")
162
+ return False
163
+
164
+
165
+ document_service = DocumentService()
app/services/mongodb_watcher.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from motor.motor_asyncio import AsyncIOMotorChangeStream
2
+ from app.db.mongodb import MongoDB
3
+ from app.utils.logger import logger
4
+ from typing import Callable, Dict
5
+ import asyncio
6
+
7
+
8
+ class MongoDBWatcher:
9
+
10
+ def __init__(self):
11
+ self.mongodb = MongoDB()
12
+ self.watchers = {}
13
+ self.running = False
14
+
15
+ async def start(self):
16
+ if self.mongodb.db is None:
17
+ await self.mongodb.connect()
18
+
19
+ self.running = True
20
+ logger.info("MongoDB watcher started")
21
+
22
+ async def stop(self):
23
+ self.running = False
24
+ for watcher in self.watchers.values():
25
+ if hasattr(watcher, 'close'):
26
+ await watcher.close()
27
+ logger.info("MongoDB watcher stopped")
28
+
29
+ async def watch_collection(
30
+ self,
31
+ collection_name: str,
32
+ callback: Callable,
33
+ pipeline: list = None
34
+ ):
35
+ try:
36
+ collection = await self.mongodb.get_collection(collection_name)
37
+
38
+ if pipeline is None:
39
+ pipeline = [
40
+ {
41
+ "$match": {
42
+ "operationType": {"$in": ["insert", "update", "delete"]}
43
+ }
44
+ }
45
+ ]
46
+
47
+ logger.info(f"Watching collection: {collection_name}")
48
+
49
+ async with collection.watch(pipeline) as change_stream:
50
+ self.watchers[collection_name] = change_stream
51
+
52
+ async for change in change_stream:
53
+ if not self.running:
54
+ break
55
+
56
+ try:
57
+ await callback(change)
58
+ except Exception as e:
59
+ logger.error(f"Callback error for {collection_name}: {e}")
60
+
61
+ except Exception as e:
62
+ logger.error(f"Watch error for {collection_name}: {e}")
63
+
64
+
65
+ async def on_document_change(change: Dict):
66
+ operation = change.get("operationType")
67
+ document = change.get("fullDocument")
68
+
69
+ if operation == "insert":
70
+ logger.info(f"New document inserted: {document.get('file_name')}")
71
+
72
+ elif operation == "update":
73
+ logger.info(f"Document updated: {document.get('file_name')}")
74
+
75
+ elif operation == "delete":
76
+ doc_id = change.get("documentKey", {}).get("_id")
77
+ logger.info(f"Document deleted: {doc_id}")
78
+
79
+
80
+ mongodb_watcher = MongoDBWatcher()
81
+
82
+
83
+ async def start_watchers():
84
+ await mongodb_watcher.start()
85
+
86
+ asyncio.create_task(
87
+ mongodb_watcher.watch_collection(
88
+ "documents",
89
+ on_document_change
90
+ )
91
+ )
92
+
93
+ logger.info("All MongoDB watchers started")
94
+
95
+
96
+ async def stop_watchers():
97
+ await mongodb_watcher.stop()
app/utils/__init__.py ADDED
File without changes
app/utils/errors.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class RagChatbotError(Exception):
2
+ def __init__(self, message: str, status_code: int = 500):
3
+ self.message = message
4
+ self.status_code = status_code
5
+ super().__init__(self.message)
6
+
7
+
8
+ class DatabaseError(RagChatbotError):
9
+ def __init__(self, message: str = "Database operation failed"):
10
+ super().__init__(message, status_code=500)
11
+
12
+
13
+ class VectorStoreError(RagChatbotError):
14
+ def __init__(self, message: str = "Vector store operation failed"):
15
+ super().__init__(message, status_code=500)
16
+
17
+
18
+ class DocumentProcessingError(RagChatbotError):
19
+ def __init__(self, message: str = "Document processing failed"):
20
+ super().__init__(message, status_code=400)
21
+
22
+
23
+ class EmbeddingError(RagChatbotError):
24
+ def __init__(self, message: str = "Embedding generation failed"):
25
+ super().__init__(message, status_code=500)
26
+
27
+
28
+ class LLMError(RagChatbotError):
29
+ def __init__(self, message: str = "LLM generation failed"):
30
+ super().__init__(message, status_code=500)
31
+
32
+
33
+ class AuthenticationError(RagChatbotError):
34
+ def __init__(self, message: str = "Authentication failed"):
35
+ super().__init__(message, status_code=401)
36
+
37
+
38
+ class ValidationError(RagChatbotError):
39
+ def __init__(self, message: str = "Validation failed"):
40
+ super().__init__(message, status_code=422)
app/utils/logger.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+
5
+
6
+ BASE_DIR = Path(__file__).parent.parent.parent
7
+ LOG_DIR = BASE_DIR / "logs"
8
+ LOG_DIR.mkdir(exist_ok=True)
9
+
10
+
11
+ def setup_logger(name: str, level: str = "INFO") -> logging.Logger:
12
+ logger = logging.getLogger(name)
13
+ logger.setLevel(getattr(logging, level.upper()))
14
+
15
+ if logger.handlers:
16
+ return logger
17
+
18
+ formatter = logging.Formatter(
19
+ fmt="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
20
+ datefmt="%Y-%m-%d %H:%M:%S"
21
+ )
22
+
23
+ console_handler = logging.StreamHandler(sys.stdout)
24
+ console_handler.setFormatter(formatter)
25
+ logger.addHandler(console_handler)
26
+
27
+ file_handler = logging.FileHandler(LOG_DIR / "app.log")
28
+ file_handler.setFormatter(formatter)
29
+ logger.addHandler(file_handler)
30
+
31
+ return logger
32
+
33
+
34
+ logger = setup_logger("rag_chatbot")
app/utils/metrics.py ADDED
File without changes
app/utils/prompts.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from functools import lru_cache
3
+
4
+
5
+ PROMPTS_DIR = Path(__file__).parent.parent.parent / "prompts"
6
+
7
+
8
+ @lru_cache()
9
+ def load_prompt(filename: str) -> str:
10
+ prompt_path = PROMPTS_DIR / filename
11
+ with open(prompt_path, "r", encoding="utf-8") as f:
12
+ return f.read().strip()
13
+
14
+
15
+ def get_system_prompt() -> str:
16
+ return load_prompt("system.txt")
17
+
18
+
19
+ def get_rag_template() -> str:
20
+ return load_prompt("rag_template.txt")
21
+
22
+
23
+ def get_conversation_template() -> str:
24
+ return load_prompt("conversation_template.txt")
app/utils/validators.py ADDED
File without changes
config/app.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ app:
2
+ name: "RAG Chatbot"
3
+ version: "1.0.0"
4
+ environment: "development"
5
+ debug: true
6
+ log_level: "INFO"
7
+
8
+ server:
9
+ host: "0.0.0.0"
10
+ port: 8000
11
+ workers: 4
12
+ reload: true
13
+
14
+ upload:
15
+ max_size_mb: 10
16
+ allowed_extensions:
17
+ - ".pdf"
18
+ - ".txt"
19
+ - ".docx"
20
+ - ".md"
21
+ upload_dir: "uploads"
config/celery.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ broker_url: "redis://localhost:6379/1"
2
+ result_backend: "redis://localhost:6379/2"
3
+
4
+ task_routes:
5
+ "workers.tasks.process_document": "default"
6
+ "workers.tasks.embed_and_index": "default"
config/database.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mongodb:
2
+ url: "mongodb+srv://abeshith24:Silvershades24@cluster0.z0fla4k.mongodb.net/?appName=Cluster0"
3
+ db_name: "rag_chatbot"
4
+ collection: "documents"
5
+
6
+ redis:
7
+ url: "redis://default:Z1onH8GwDP7ayX7tlALvcezqOqHgcziH@redis-17971.c256.us-east-1-2.ec2.cloud.redislabs.com:17971"
8
+
9
+ qdrant:
10
+ url: "https://443828e0-56d0-4072-b87d-5f3056aa3a15.europe-west3-0.gcp.cloud.qdrant.io:6333"
11
+ collection_name: "rag_embeddings"
12
+ vector_size: 384
config/langchain.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain:
2
+ verbose: false
3
+ callbacks: []
4
+
5
+ langsmith:
6
+ enabled: false
7
+ project_name: "rag-chatbot"
8
+
9
+ langgraph:
10
+ recursion_limit: 25
11
+ checkpointer: "redis"
12
+
13
+ callbacks:
14
+ streaming: true
15
+ token_counting: true
config/models.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ llm:
2
+ model_name: "openai/gpt-oss-20b"
3
+ temperature: 0.7
4
+ max_tokens: 1024
5
+ streaming: true
6
+
7
+ embedding:
8
+ model_name: "BAAI/bge-small-en-v1.5"
9
+ max_length: 512
10
+
11
+ reranker:
12
+ enabled: true
13
+ model: "ms-marco-MiniLM-L-12-v2"
14
+ top_n: 3
config/rag.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text_splitter:
2
+ chunk_size: 512
3
+ chunk_overlap: 50
4
+
5
+ retrieval:
6
+ top_k: 10
7
+ search_type: "mmr"
8
+ fetch_k: 20
9
+ lambda_mult: 0.5
10
+ score_threshold: 0.7
11
+ rerank: true
12
+
13
+ cache:
14
+ enabled: true
15
+ ttl: 3600
16
+
17
+ memory:
18
+ type: "buffer"
19
+ max_messages: 10
20
+ return_messages: true
config/security.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cors:
2
+ enabled: true
3
+ origins:
4
+ - "http://localhost:3000"
5
+ - "http://localhost:8000"
6
+ allow_credentials: true
7
+ allow_methods: ["*"]
8
+ allow_headers: ["*"]
9
+
10
+ rate_limiting:
11
+ enabled: true
12
+ requests_per_minute: 60
13
+ burst: 10
14
+ storage: "redis"
15
+
16
+ jwt:
17
+ algorithm: "HS256"
18
+ access_token_expire_minutes: 30
19
+ refresh_token_expire_days: 7
20
+
21
+ allowed_hosts:
22
+ - "*"
docker-compose.yml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ # Main RAG application
5
+ app:
6
+ build: .
7
+ container_name: rag-chatbot
8
+ ports:
9
+ - "7860:7860"
10
+ environment:
11
+ - GROQ_API_KEY=${GROQ_API_KEY}
12
+ - QDRANT_API_KEY=${QDRANT_API_KEY}
13
+ - REDIS_PASSWORD=${REDIS_PASSWORD}
14
+ env_file:
15
+ - .env
16
+ volumes:
17
+ - ./uploads:/app/uploads
18
+ - ./logs:/app/logs
19
+ depends_on:
20
+ - redis
21
+ restart: unless-stopped
22
+ networks:
23
+ - rag-network
24
+
25
+ # Redis for caching
26
+ redis:
27
+ image: redis:7-alpine
28
+ container_name: rag-redis
29
+ ports:
30
+ - "6379:6379"
31
+ command: redis-server --appendonly yes
32
+ volumes:
33
+ - redis-data:/data
34
+ restart: unless-stopped
35
+ networks:
36
+ - rag-network
37
+
38
+ # Celery worker (optional)
39
+ celery-worker:
40
+ build: .
41
+ container_name: rag-celery-worker
42
+ command: celery -A app.tasks.celery_app worker --loglevel=info
43
+ environment:
44
+ - GROQ_API_KEY=${GROQ_API_KEY}
45
+ - QDRANT_API_KEY=${QDRANT_API_KEY}
46
+ - REDIS_PASSWORD=${REDIS_PASSWORD}
47
+ env_file:
48
+ - .env
49
+ depends_on:
50
+ - redis
51
+ - app
52
+ restart: unless-stopped
53
+ networks:
54
+ - rag-network
55
+
56
+ volumes:
57
+ redis-data:
58
+
59
+ networks:
60
+ rag-network:
61
+ driver: bridge