Subhakanta commited on
Commit ยท
4787e22
0
Parent(s):
Initial commit without data folder
Browse files- .dockerignore +11 -0
- .gitignore +30 -0
- Dockerfile +44 -0
- LICENSE +21 -0
- README.md +109 -0
- app.py +13 -0
- backend/api.py +33 -0
- backend/models.py +7 -0
- frontend/index.html +22 -0
- frontend/script.js +37 -0
- frontend/style.css +86 -0
- src/chatbot.py +148 -0
- src/check_index.py +25 -0
- src/ingest.py +86 -0
- src/query.py +47 -0
.dockerignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
*.pyd
|
| 5 |
+
.env
|
| 6 |
+
.venv
|
| 7 |
+
venv/
|
| 8 |
+
.env/
|
| 9 |
+
.git/
|
| 10 |
+
.gitignore
|
| 11 |
+
/data
|
.gitignore
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python Virtual Environments
|
| 2 |
+
rag_env/
|
| 3 |
+
venv/
|
| 4 |
+
env/
|
| 5 |
+
.venv/
|
| 6 |
+
|
| 7 |
+
# Secret Keys and Environment Variables
|
| 8 |
+
.env
|
| 9 |
+
|
| 10 |
+
# Generated Data & Vector Stores
|
| 11 |
+
vectorStore/
|
| 12 |
+
/backend/vectorStore/
|
| 13 |
+
data/
|
| 14 |
+
*.pdf
|
| 15 |
+
*.txt
|
| 16 |
+
# Python specific
|
| 17 |
+
__pycache__/
|
| 18 |
+
*.pyc
|
| 19 |
+
*.pyo
|
| 20 |
+
*.pyd
|
| 21 |
+
|
| 22 |
+
# IDE / Editor specific settings
|
| 23 |
+
# Ignore personal editor configurations
|
| 24 |
+
.vscode/
|
| 25 |
+
.idea/
|
| 26 |
+
|
| 27 |
+
# OS-specific files
|
| 28 |
+
# Ignore files generated by macOS and Windows
|
| 29 |
+
.DS_Store
|
| 30 |
+
Thumbs.db
|
Dockerfile
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ================================
|
| 2 |
+
# 1. Use an official Python runtime
|
| 3 |
+
# ================================
|
| 4 |
+
FROM python:3.11-slim
|
| 5 |
+
|
| 6 |
+
# Prevent Python from writing pyc files & buffering stdout/stderr
|
| 7 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 8 |
+
ENV PYTHONUNBUFFERED=1
|
| 9 |
+
|
| 10 |
+
# ================================
|
| 11 |
+
# 2. Set working directory
|
| 12 |
+
# ================================
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
|
| 15 |
+
# ================================
|
| 16 |
+
# 3. Install OS dependencies (if needed)
|
| 17 |
+
# ================================
|
| 18 |
+
# Add any OS packages here if your code needs them (curl, gcc, etc.)
|
| 19 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 20 |
+
build-essential \
|
| 21 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# ================================
|
| 24 |
+
# 4. Install Python dependencies
|
| 25 |
+
# ================================
|
| 26 |
+
# Copy only requirements first for better caching
|
| 27 |
+
COPY requirements.txt .
|
| 28 |
+
|
| 29 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 30 |
+
|
| 31 |
+
# ================================
|
| 32 |
+
# 5. Copy project files
|
| 33 |
+
# ================================
|
| 34 |
+
COPY . .
|
| 35 |
+
|
| 36 |
+
# ================================
|
| 37 |
+
# 6. Expose the FastAPI port
|
| 38 |
+
# ================================
|
| 39 |
+
EXPOSE 7860
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# --- OR ---
|
| 44 |
+
CMD ["uvicorn", "backend.api:app", "--host", "0.0.0.0", "--port", "7860"]
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Subhakanta Rath
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ๐ Odisha Disaster Management RAG Chatbot
|
| 2 |
+
|
| 3 |
+
## ๐ Overview
|
| 4 |
+
Odisha faces recurring disasters every year such as **floods, cyclones, and droughts**.
|
| 5 |
+
While the state has a strong disaster management authority (OSDMA), information is often scattered across reports, research papers, and government documents.
|
| 6 |
+
|
| 7 |
+
This project builds a **Retrieval-Augmented Generation (RAG) based chatbot** that provides citizens, researchers, and policymakers with **clear, reliable, and contextual answers** related to Odishaโs disaster management practices.
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## โจ Features
|
| 12 |
+
- Handles **132 PDFs** and **12 text files** (OSDMA, IMD, NDMA, research papers).
|
| 13 |
+
- **Preprocessing pipeline**: PDF/text extraction, cleaning, normalization, chunking.
|
| 14 |
+
- **Embeddings** with `sentence-transformers/all-MiniLM-L6-v2`.
|
| 15 |
+
- **FAISS Vector Database** for fast and efficient retrieval.
|
| 16 |
+
- **RAG pipeline**:
|
| 17 |
+
1. User query โ query structuring (handles poor English, spelling issues).
|
| 18 |
+
2. Retrieve relevant chunks from FAISS.
|
| 19 |
+
3. If no relevant results โ no LLM call (saves cost).
|
| 20 |
+
4. If relevant โ LLM generates structured, contextual answers.
|
| 21 |
+
- **Prompt engineering** for better accuracy and reduced hallucinations.
|
| 22 |
+
- Backend: **FastAPI**.
|
| 23 |
+
- Frontend: **HTML, CSS, JS chatbot interface**.
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
## ๐๏ธ Architecture
|
| 28 |
+
|
| 29 |
+
**User Query โ Query Structuring โ FAISS Retriever โ Relevant Chunks โ LLM โ Answer**
|
| 30 |
+
|
| 31 |
+
# ๐ ๏ธ Tech Stack
|
| 32 |
+
|
| 33 |
+
- **Python** (data handling & backend)
|
| 34 |
+
- **PyPDF, TextLoader** โ PDF/Text extraction
|
| 35 |
+
- **FAISS** โ Vector database
|
| 36 |
+
- **HuggingFace Sentence Transformers** โ Embeddings
|
| 37 |
+
- **FastAPI** โ Backend API
|
| 38 |
+
- **HTML, CSS, JavaScript** โ Frontend chatbot UI
|
| 39 |
+
- **LLM (OpenAI / HuggingFace)** โ Answer generation
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
## โ๏ธ Installation
|
| 44 |
+
|
| 45 |
+
### 1. Clone the repository
|
| 46 |
+
```bash
|
| 47 |
+
git clone https://github.com/subhakanta156/odisha-disaster-knowledge-assistant.git
|
| 48 |
+
```
|
| 49 |
+
### 2. Create virtual environment & install dependencies
|
| 50 |
+
```bash
|
| 51 |
+
python -m venv venv
|
| 52 |
+
source venv/bin/activate # Linux/Mac
|
| 53 |
+
venv\Scripts\activate # Windows
|
| 54 |
+
|
| 55 |
+
pip install -r requirements.txt
|
| 56 |
+
```
|
| 57 |
+
### 3. Prepare the data
|
| 58 |
+
- Place all PDFs/text files inside the data/ folder.
|
| 59 |
+
- Run preprocessing & embedding script:
|
| 60 |
+
```bash
|
| 61 |
+
python scripts/build_vector_store.py
|
| 62 |
+
```
|
| 63 |
+
### 4. Run the FastAPI backend
|
| 64 |
+
```bash
|
| 65 |
+
uvicorn app.main:app --reload
|
| 66 |
+
```
|
| 67 |
+
### 5. Open the frontend
|
| 68 |
+
- Open `frontend/index.html` in your browser.
|
| 69 |
+
|
| 70 |
+
## ๐ Usage
|
| 71 |
+
|
| 72 |
+
Ask questions like:
|
| 73 |
+
|
| 74 |
+
- โHow does Odishaโs disaster proneness compare with other Indian states?โ
|
| 75 |
+
- โProvide details of relief funds sanctioned for Odisha during the 1999 Super Cyclone.โ
|
| 76 |
+
- โWhich Odisha agency is primarily responsible for issuing cyclone alerts?โ
|
| 77 |
+
- โExplain the key steps taken by the Odisha government if lives are lost in a disaster?โ
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
The system retrieves relevant chunks from reports and generates reliable, structured answers.
|
| 81 |
+
|
| 82 |
+
---
|
| 83 |
+
|
| 84 |
+
## ๐ Optimizations
|
| 85 |
+
|
| 86 |
+
- Added query filtering โ No LLM call if retrieval fails (reduces cost).
|
| 87 |
+
- Handled poor English queries via query restructuring.
|
| 88 |
+
- Improved prompt engineering to minimize hallucinations.
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
## ๐ Future Improvements
|
| 93 |
+
|
| 94 |
+
- Add multilingual support (Odia/Hindi queries).
|
| 95 |
+
- Deploy on cloud (AWS/GCP/Azure) with Docker.
|
| 96 |
+
- Use advanced embeddings (e.g., `all-mpnet-base-v2`) for higher accuracy.
|
| 97 |
+
- Add real-time updates (e.g., cyclone alerts).
|
| 98 |
+
|
| 99 |
+
---
|
| 100 |
+
|
| 101 |
+
## ๐จโ๐ป Author
|
| 102 |
+
|
| 103 |
+
**Subhakanta Rath**
|
| 104 |
+
|
| 105 |
+
MSc AI & ML @ IIIT Lucknow
|
| 106 |
+
|
| 107 |
+
Passionate about AI/ML, Data Engineering
|
| 108 |
+
|
| 109 |
+
|
app.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# run.py at E:\odisha_disaster_chatbot
|
| 2 |
+
|
| 3 |
+
import uvicorn
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
# ensure project root on sys.path
|
| 8 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 9 |
+
sys.path.append(str(BASE_DIR))
|
| 10 |
+
|
| 11 |
+
if __name__ == "__main__":
|
| 12 |
+
# run the app from backend/api.py
|
| 13 |
+
uvicorn.run("backend.api:app", host="0.0.0.0", port=8000, reload=True)
|
backend/api.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# add project root (E:\odisha_disaster_chatbot) to Python path
|
| 5 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 6 |
+
|
| 7 |
+
from fastapi import FastAPI
|
| 8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
+
from backend.models import ChatRequest, ChatResponse # in backend/
|
| 10 |
+
from src.chatbot import RAGChatBot
|
| 11 |
+
|
| 12 |
+
app = FastAPI(title="Odisha Disaster Management Chatbot")
|
| 13 |
+
|
| 14 |
+
# โ
Allow frontend to talk to backend
|
| 15 |
+
app.add_middleware(
|
| 16 |
+
CORSMiddleware,
|
| 17 |
+
allow_origins=["*"],
|
| 18 |
+
allow_credentials=True,
|
| 19 |
+
allow_methods=["*"],
|
| 20 |
+
allow_headers=["*"],
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Initialize chatbot once (not per request)
|
| 24 |
+
bot = RAGChatBot()
|
| 25 |
+
|
| 26 |
+
@app.post("/chat", response_model=ChatResponse)
|
| 27 |
+
def chat(request: ChatRequest):
|
| 28 |
+
answer = bot.chat(request.query)
|
| 29 |
+
return ChatResponse(answer=answer)
|
| 30 |
+
|
| 31 |
+
@app.get("/")
|
| 32 |
+
def root():
|
| 33 |
+
return {"message": "โ
Odisha Disaster Management Chatbot API is running"}
|
backend/models.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
|
| 3 |
+
class ChatRequest(BaseModel):
|
| 4 |
+
query: str
|
| 5 |
+
|
| 6 |
+
class ChatResponse(BaseModel):
|
| 7 |
+
answer: str
|
frontend/index.html
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Odisha Disaster Chatbot</title>
|
| 7 |
+
<link rel="stylesheet" href="style.css">
|
| 8 |
+
</head>
|
| 9 |
+
<body>
|
| 10 |
+
<div class="chat-container">
|
| 11 |
+
<h2>Odisha Disaster Chatbot</h2>
|
| 12 |
+
<div id="chatbox" class="chatbox"></div>
|
| 13 |
+
<div class="input-area">
|
| 14 |
+
<input type="text" id="query" placeholder="Ask something..."
|
| 15 |
+
onkeydown="if(event.key==='Enter') askBot()" />
|
| 16 |
+
<button onclick="askBot()">Send</button>
|
| 17 |
+
</div>
|
| 18 |
+
</div>
|
| 19 |
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 20 |
+
<script src="script.js"></script>
|
| 21 |
+
</body>
|
| 22 |
+
</html>
|
frontend/script.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async function askBot() {
|
| 2 |
+
let query = document.getElementById("query").value;
|
| 3 |
+
if (!query.trim()) return;
|
| 4 |
+
|
| 5 |
+
// Show user message
|
| 6 |
+
addMessage(query, "user");
|
| 7 |
+
|
| 8 |
+
// Clear input field
|
| 9 |
+
document.getElementById("query").value = "";
|
| 10 |
+
|
| 11 |
+
try {
|
| 12 |
+
let res = await fetch("http://127.0.0.1:8000/chat", {
|
| 13 |
+
method: "POST",
|
| 14 |
+
headers: { "Content-Type": "application/json" },
|
| 15 |
+
body: JSON.stringify({ query: query })
|
| 16 |
+
});
|
| 17 |
+
|
| 18 |
+
let data = await res.json();
|
| 19 |
+
|
| 20 |
+
// Show bot response
|
| 21 |
+
addMessage(data.answer, "bot");
|
| 22 |
+
} catch (error) {
|
| 23 |
+
addMessage("โ ๏ธ Unable to reach server. Please try again later.", "bot");
|
| 24 |
+
}
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
function addMessage(text, sender) {
|
| 28 |
+
let chatbox = document.getElementById("chatbox");
|
| 29 |
+
let msg = document.createElement("div");
|
| 30 |
+
msg.classList.add("message", sender);
|
| 31 |
+
// msg.textContent = text;
|
| 32 |
+
msg.innerHTML = marked.parse(text);
|
| 33 |
+
chatbox.appendChild(msg);
|
| 34 |
+
|
| 35 |
+
// Auto-scroll to the bottom
|
| 36 |
+
chatbox.scrollTop = chatbox.scrollHeight;
|
| 37 |
+
}
|
frontend/style.css
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
body {
|
| 2 |
+
font-family: Arial, sans-serif;
|
| 3 |
+
background: #f0f2f5;
|
| 4 |
+
display: flex;
|
| 5 |
+
justify-content: center;
|
| 6 |
+
align-items: center;
|
| 7 |
+
height: 100vh;
|
| 8 |
+
margin: 0;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
.chat-container {
|
| 12 |
+
width: 400px;
|
| 13 |
+
background: #fff;
|
| 14 |
+
border-radius: 12px;
|
| 15 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
|
| 16 |
+
display: flex;
|
| 17 |
+
flex-direction: column;
|
| 18 |
+
overflow: hidden;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.chat-container h2 {
|
| 22 |
+
background: #007bff;
|
| 23 |
+
color: white;
|
| 24 |
+
padding: 15px;
|
| 25 |
+
margin: 0;
|
| 26 |
+
text-align: center;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
.chatbox {
|
| 30 |
+
flex: 1;
|
| 31 |
+
padding: 15px;
|
| 32 |
+
overflow-y: auto;
|
| 33 |
+
display: flex;
|
| 34 |
+
flex-direction: column;
|
| 35 |
+
gap: 10px;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
.message {
|
| 39 |
+
max-width: 75%;
|
| 40 |
+
padding: 10px 15px;
|
| 41 |
+
border-radius: 15px;
|
| 42 |
+
word-wrap: break-word;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.user {
|
| 46 |
+
align-self: flex-end;
|
| 47 |
+
background: #007bff;
|
| 48 |
+
color: white;
|
| 49 |
+
border-bottom-right-radius: 5px;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.bot {
|
| 53 |
+
align-self: flex-start;
|
| 54 |
+
background: #e4e6eb;
|
| 55 |
+
color: black;
|
| 56 |
+
border-bottom-left-radius: 5px;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.input-area {
|
| 60 |
+
display: flex;
|
| 61 |
+
padding: 10px;
|
| 62 |
+
border-top: 1px solid #ddd;
|
| 63 |
+
background: #fafafa;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
.input-area input {
|
| 67 |
+
flex: 1;
|
| 68 |
+
padding: 10px;
|
| 69 |
+
border: 1px solid #ddd;
|
| 70 |
+
border-radius: 8px;
|
| 71 |
+
outline: none;
|
| 72 |
+
margin-right: 10px;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
.input-area button {
|
| 76 |
+
padding: 10px 15px;
|
| 77 |
+
border: none;
|
| 78 |
+
border-radius: 8px;
|
| 79 |
+
background: #007bff;
|
| 80 |
+
color: white;
|
| 81 |
+
cursor: pointer;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
.input-area button:hover {
|
| 85 |
+
background: #0056b3;
|
| 86 |
+
}
|
src/chatbot.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import List
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
from langchain_groq import ChatGroq
|
| 6 |
+
from langchain.schema import HumanMessage, AIMessage
|
| 7 |
+
from langchain_community.vectorstores import FAISS
|
| 8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 9 |
+
from langchain.prompts import PromptTemplate
|
| 10 |
+
from langchain.chains import RetrievalQA
|
| 11 |
+
|
| 12 |
+
# ---------------------------
|
| 13 |
+
# Load environment variables
|
| 14 |
+
# ---------------------------
|
| 15 |
+
load_dotenv()
|
| 16 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 17 |
+
|
| 18 |
+
# ---------------------------
|
| 19 |
+
# Settings / Tuning
|
| 20 |
+
# ---------------------------
|
| 21 |
+
DB_FAISS_PATH = "vectorStore"
|
| 22 |
+
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 23 |
+
K = 5 # how many candidates to check for pre-filter
|
| 24 |
+
MAX_DISTANCE = 1.0 # FAISS distance threshold (lower = better).
|
| 25 |
+
MAX_CHAT_HISTORY = 50 # cap chat history to avoid unbounded growth
|
| 26 |
+
|
| 27 |
+
# ---------------------------
|
| 28 |
+
# Load FAISS VectorStore
|
| 29 |
+
# ---------------------------
|
| 30 |
+
embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
| 31 |
+
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
|
| 32 |
+
print(f"โ
FAISS index size: {db.index.ntotal}")
|
| 33 |
+
|
| 34 |
+
# ---------------------------
|
| 35 |
+
# ChatBot Class
|
| 36 |
+
# ---------------------------
|
| 37 |
+
class RAGChatBot:
|
| 38 |
+
def __init__(self):
|
| 39 |
+
# LLM
|
| 40 |
+
if not GROQ_API_KEY:
|
| 41 |
+
raise ValueError("GROQ_API_KEY not set in environment")
|
| 42 |
+
self.llm = ChatGroq(
|
| 43 |
+
groq_api_key=GROQ_API_KEY,
|
| 44 |
+
model="llama-3.1-8b-instant",
|
| 45 |
+
temperature=0
|
| 46 |
+
)
|
| 47 |
+
self.chat_history: List = []
|
| 48 |
+
|
| 49 |
+
# Retriever used by RetrievalQA (kept, but we will pre-filter before calling the chain)
|
| 50 |
+
self.retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
| 51 |
+
|
| 52 |
+
# Custom Prompt (dynamic fallback included)
|
| 53 |
+
custom_prompt = """
|
| 54 |
+
Use the following context to answer the userโs question.
|
| 55 |
+
If the answer cannot be found in the context, reply exactly with:
|
| 56 |
+
"I'm trained only on Odisha disaster management reports (i.e,OSDMA, NDMA, IMD, Research papers). I don't have any information about: '{question}'"
|
| 57 |
+
|
| 58 |
+
Context:
|
| 59 |
+
{context}
|
| 60 |
+
|
| 61 |
+
Question:
|
| 62 |
+
{question}
|
| 63 |
+
|
| 64 |
+
Answer:
|
| 65 |
+
"""
|
| 66 |
+
self.prompt = PromptTemplate(template=custom_prompt, input_variables=["context", "question"])
|
| 67 |
+
|
| 68 |
+
# Retrieval QA Chain (keeps structured QA behavior)
|
| 69 |
+
self.qa_chain = RetrievalQA.from_chain_type(
|
| 70 |
+
llm=self.llm,
|
| 71 |
+
retriever=self.retriever,
|
| 72 |
+
return_source_documents=True,
|
| 73 |
+
chain_type_kwargs={"prompt": self.prompt}
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# ---------------------------
|
| 77 |
+
# NEW: Rewrite function
|
| 78 |
+
# ---------------------------
|
| 79 |
+
def rewrite_query(self, user_input: str) -> str:
|
| 80 |
+
"""Rewrite query into formal disaster-management style language using LLM."""
|
| 81 |
+
rewrite_prompt = f"""
|
| 82 |
+
Rewrite the following user query into clear, formal disaster management language
|
| 83 |
+
as used in government reports (OSDMA, NDMA, IMD).
|
| 84 |
+
If it is not disaster-related, just return it unchanged.
|
| 85 |
+
|
| 86 |
+
Query: {user_input}
|
| 87 |
+
"""
|
| 88 |
+
try:
|
| 89 |
+
response = self.llm.invoke([HumanMessage(content=rewrite_prompt)])
|
| 90 |
+
return response.content.strip()
|
| 91 |
+
except Exception as e:
|
| 92 |
+
print("โ Rewrite error:", e)
|
| 93 |
+
return user_input # fallback to original
|
| 94 |
+
|
| 95 |
+
def _prefilter_by_distance(self, query: str, k: int = K, max_distance: float = MAX_DISTANCE) -> bool:
|
| 96 |
+
"""Check if query is in-domain using FAISS distance."""
|
| 97 |
+
results = db.similarity_search_with_score(query, k=k)
|
| 98 |
+
if not results:
|
| 99 |
+
return False
|
| 100 |
+
best_score = results[0][1] # (Document, score)
|
| 101 |
+
return best_score <= max_distance
|
| 102 |
+
|
| 103 |
+
def chat(self, user_input: str) -> str:
|
| 104 |
+
# 1) Rewrite user query
|
| 105 |
+
rewritten_query = self.rewrite_query(user_input)
|
| 106 |
+
# print(f"[debug] rewritten query: {rewritten_query}")
|
| 107 |
+
|
| 108 |
+
# 2) Quick in-domain prefilter
|
| 109 |
+
try:
|
| 110 |
+
in_domain = self._prefilter_by_distance(rewritten_query)
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print("โ prefilter error:", e)
|
| 113 |
+
in_domain = True
|
| 114 |
+
|
| 115 |
+
if not in_domain:
|
| 116 |
+
return (
|
| 117 |
+
f"Iโm trained only on Odisha disaster management reports "
|
| 118 |
+
f"(OSDMA, NDMA, IMD, research). I donโt have any information about: '{user_input}'."
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
# 3) Retrieval + QA
|
| 122 |
+
try:
|
| 123 |
+
response = self.qa_chain.invoke({"query": rewritten_query})
|
| 124 |
+
answer = response.get("result") if isinstance(response, dict) else str(response)
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print("โ LLM / chain error:", e)
|
| 127 |
+
answer = "Sorry, I encountered an error while generating the answer."
|
| 128 |
+
|
| 129 |
+
# 4) Update memory (bounded)
|
| 130 |
+
self.chat_history.append(HumanMessage(content=user_input))
|
| 131 |
+
self.chat_history.append(AIMessage(content=answer))
|
| 132 |
+
if len(self.chat_history) > MAX_CHAT_HISTORY * 2:
|
| 133 |
+
self.chat_history = self.chat_history[-MAX_CHAT_HISTORY * 2 :]
|
| 134 |
+
|
| 135 |
+
return answer
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ---------------------------
|
| 139 |
+
# Run Chatbot (CLI)
|
| 140 |
+
# ---------------------------
|
| 141 |
+
if __name__ == "__main__":
|
| 142 |
+
bot = RAGChatBot()
|
| 143 |
+
print("๐ค Odisha Disaster Management ChatBot ready! Type 'exit' to quit.")
|
| 144 |
+
while True:
|
| 145 |
+
query = input("You: ")
|
| 146 |
+
if query.lower() in ["exit", "quit"]:
|
| 147 |
+
break
|
| 148 |
+
print("Bot:", bot.chat(query))
|
src/check_index.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 2 |
+
from langchain_community.vectorstores import FAISS
|
| 3 |
+
|
| 4 |
+
# Path of vectorstore
|
| 5 |
+
DB_FAISS_PATH = "vectorStore"
|
| 6 |
+
|
| 7 |
+
def check_faiss_index():
|
| 8 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 9 |
+
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
|
| 10 |
+
|
| 11 |
+
# Number of vectors stored in index.faiss
|
| 12 |
+
num_vectors = db.index.ntotal
|
| 13 |
+
|
| 14 |
+
# Number of documents (with metadata) stored in index.pkl
|
| 15 |
+
num_docs = len(db.docstore._dict)
|
| 16 |
+
|
| 17 |
+
print(f"๐ฆ index.faiss contains {num_vectors} vectors")
|
| 18 |
+
print(f"๐ index.pkl contains {num_docs} metadata entries")
|
| 19 |
+
|
| 20 |
+
if __name__ == "__main__":
|
| 21 |
+
check_faiss_index()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
src/ingest.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
from langchain_core.documents import Document
|
| 6 |
+
from langchain_community.document_loaders import PyPDFLoader, TextLoader
|
| 7 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 9 |
+
from langchain_community.vectorstores import FAISS
|
| 10 |
+
|
| 11 |
+
# Load environment variables from .env file
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
# Define the path for the FAISS vector store
|
| 15 |
+
DB_FAISS_PATH = 'vectorStore'
|
| 16 |
+
|
| 17 |
+
def clean_text(text):
|
| 18 |
+
"""Clean messy headers/footers and normalize spacing."""
|
| 19 |
+
text = re.sub(r'\n\s*\n', '\n\n', text) # collapse multiple newlines
|
| 20 |
+
lines = text.split('\n')
|
| 21 |
+
cleaned_lines = []
|
| 22 |
+
for line in lines:
|
| 23 |
+
if sum(c.isalpha() for c in line) > 5: # keep if more than 5 letters
|
| 24 |
+
cleaned_lines.append(line)
|
| 25 |
+
text = '\n'.join(cleaned_lines)
|
| 26 |
+
text = re.sub(r'\s+', ' ', text).strip() # normalize spaces
|
| 27 |
+
return text
|
| 28 |
+
|
| 29 |
+
def load_documents():
|
| 30 |
+
"""Manually load PDF and text documents from the 'data/' folder with proper encoding."""
|
| 31 |
+
data_dir = '../data'
|
| 32 |
+
documents = []
|
| 33 |
+
|
| 34 |
+
for root, _, files in os.walk(data_dir):
|
| 35 |
+
for file in files:
|
| 36 |
+
file_path = os.path.join(root, file)
|
| 37 |
+
if file.lower().endswith('.pdf'):
|
| 38 |
+
loader = PyPDFLoader(file_path)
|
| 39 |
+
print(f"Loading PDF {file_path}")
|
| 40 |
+
documents.extend(loader.load())
|
| 41 |
+
elif file.lower().endswith('.txt'):
|
| 42 |
+
print(f"Loading TXT {file_path}")
|
| 43 |
+
try:
|
| 44 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 45 |
+
text = f.read()
|
| 46 |
+
documents.append(Document(page_content=text, metadata={"source": file_path}))
|
| 47 |
+
except UnicodeDecodeError as e:
|
| 48 |
+
print(f"โ Skipping {file_path} due to encoding error: {e}")
|
| 49 |
+
else:
|
| 50 |
+
continue
|
| 51 |
+
return documents
|
| 52 |
+
|
| 53 |
+
def create_vector_db():
|
| 54 |
+
print("Step 1: Loading documents from the 'data/' directory...")
|
| 55 |
+
documents = load_documents()
|
| 56 |
+
|
| 57 |
+
if not documents:
|
| 58 |
+
print("No documents found in the 'data' directory. Exiting.")
|
| 59 |
+
return
|
| 60 |
+
|
| 61 |
+
print(f"Loaded {len(documents)} document(s).")
|
| 62 |
+
|
| 63 |
+
print("\nStep 2: Cleaning the text content...")
|
| 64 |
+
for doc in documents:
|
| 65 |
+
doc.page_content = clean_text(doc.page_content)
|
| 66 |
+
print("Text cleaning complete.")
|
| 67 |
+
|
| 68 |
+
print("\nStep 3: Splitting into chunks...")
|
| 69 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 70 |
+
chunk_size=1000,
|
| 71 |
+
chunk_overlap=100
|
| 72 |
+
)
|
| 73 |
+
chunks = text_splitter.split_documents(documents)
|
| 74 |
+
print(f"Created {len(chunks)} chunks.")
|
| 75 |
+
|
| 76 |
+
print("\nStep 4: Creating embeddings with HuggingFace...")
|
| 77 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 78 |
+
|
| 79 |
+
print("Step 5: Building FAISS index...")
|
| 80 |
+
db = FAISS.from_documents(chunks, embeddings)
|
| 81 |
+
db.save_local(DB_FAISS_PATH)
|
| 82 |
+
|
| 83 |
+
print(f"\nโ
Ingestion complete! Vector store saved at '{DB_FAISS_PATH}'")
|
| 84 |
+
|
| 85 |
+
if __name__ == "__main__":
|
| 86 |
+
create_vector_db()
|
src/query.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.vectorstores import FAISS
|
| 2 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 3 |
+
|
| 4 |
+
# Load embeddings and vector DB
|
| 5 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 6 |
+
db = FAISS.load_local("vectorStore", embeddings, allow_dangerous_deserialization=True)
|
| 7 |
+
|
| 8 |
+
# Ask a query
|
| 9 |
+
query = "how many cyclone struck odisha in 2015"
|
| 10 |
+
results = db.similarity_search_with_score(query, k=5)
|
| 11 |
+
|
| 12 |
+
# Combine results into one paragraph
|
| 13 |
+
# answer = " ".join([doc.page_content for doc in results])
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# Apply similarity threshold
|
| 17 |
+
THRESHOLD = 0.75
|
| 18 |
+
|
| 19 |
+
filtered = []
|
| 20 |
+
for doc, score in results:
|
| 21 |
+
print(f"๐ Retrieved (distance={score:.4f}): {doc.metadata}") # debug
|
| 22 |
+
if score <= THRESHOLD: # <-- check for "closer than threshold"
|
| 23 |
+
filtered.append(doc)
|
| 24 |
+
|
| 25 |
+
if not filtered:
|
| 26 |
+
answer = "I don't know. This information is not available in my knowledge base."
|
| 27 |
+
else:
|
| 28 |
+
answer = "\n\n".join([doc.page_content for doc in filtered])
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
print(f"\n๐ Query: {query}")
|
| 32 |
+
print(f"\n๐ Answer:\n{answer}")
|
| 33 |
+
|
| 34 |
+
# for i, doc in enumerate(results, 1):
|
| 35 |
+
# print(f"\nResult {i}:")
|
| 36 |
+
# print(f"Source: {doc.metadata}")
|
| 37 |
+
# print(f"Content: {doc.page_content[:300]}...")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# it gives query aswell as metadata also
|
| 43 |
+
# print(f"\n๐ Query: {query}")
|
| 44 |
+
# for i, doc in enumerate(results, 2):
|
| 45 |
+
# print(f"\n--- Result {i} ---")
|
| 46 |
+
# print(doc.page_content[:700]) # show first 500 characters
|
| 47 |
+
# print("Source:", doc.metadata)
|