Spaces:
Runtime error
Runtime error
Upload 22 files
Browse files- .gitattributes +1 -0
- Dockerfile +23 -0
- data/info.txt +1289 -0
- local_faiss_index/index.faiss +3 -0
- local_faiss_index/index.pkl +3 -0
- main.py +37 -0
- requirements.txt +9 -0
- src/__init__.py +3 -0
- src/__pycache__/__init__.cpython-310.pyc +0 -0
- src/__pycache__/__init__.cpython-311.pyc +0 -0
- src/__pycache__/data_ingestion.cpython-310.pyc +0 -0
- src/__pycache__/data_ingestion.cpython-311.pyc +0 -0
- src/__pycache__/data_preprocessing.cpython-310.pyc +0 -0
- src/__pycache__/data_preprocessing.cpython-311.pyc +0 -0
- src/__pycache__/evaluation.cpython-310.pyc +0 -0
- src/__pycache__/retrival_generation.cpython-310.pyc +0 -0
- src/__pycache__/retrival_generation.cpython-311.pyc +0 -0
- src/data_ingestion.py +38 -0
- src/data_preprocessing.py +37 -0
- src/evaluation.py +109 -0
- src/retrival_generation.py +112 -0
- static/style.css +248 -0
- templates/index.html +104 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
local_faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 4 |
+
ENV PYTHONUNBUFFERED=1
|
| 5 |
+
ENV PYTHONPATH=/app/src
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
|
| 9 |
+
RUN apt-get update && apt-get install -y \
|
| 10 |
+
build-essential \
|
| 11 |
+
libglib2.0-0 \
|
| 12 |
+
libsm6 \
|
| 13 |
+
libxext6 \
|
| 14 |
+
libxrender-dev \
|
| 15 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
+
|
| 17 |
+
COPY . .
|
| 18 |
+
|
| 19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
+
|
| 21 |
+
EXPOSE 8000
|
| 22 |
+
|
| 23 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
data/info.txt
ADDED
|
@@ -0,0 +1,1289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
10th class Marks
|
| 2 |
+
|
| 3 |
+
**Board of Secondary Education
|
| 4 |
+
Telangana State, India**
|
| 5 |
+
|
| 6 |
+
**SECONDARY SCHOOL CERTIFICATE**
|
| 7 |
+
**REGULAR** PC/29/4222/04/256517/3
|
| 8 |
+
**TS-EE 524495**
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
**CERTIFIED THAT**
|
| 13 |
+
**KATTA SAI PRANAV REDDY**
|
| 14 |
+
**Father's Name:** KATTA SRINIVAS REDDY
|
| 15 |
+
**Mother's Name:** KATTA UMARANI
|
| 16 |
+
**Roll No.:** 1929100642
|
| 17 |
+
**Date of Birth:** 03/06/2003 (Zero Three June Two Zero Zero Three)
|
| 18 |
+
**School:** EKALAVYA FOUNDATION SCL NALGONDA, NALGONDA DISTRICT
|
| 19 |
+
**Medium:** ENGLISH
|
| 20 |
+
|
| 21 |
+
Has appeared and **PASSED SSC EXAMINATION** held in **MARCH–2019**
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
### **The Candidate Secured the Following Grade and Grade Points in Curricular Areas:**
|
| 25 |
+
|
| 26 |
+
| Subject | Grade FA | Grade SA | Overall Grade | Grade Point |
|
| 27 |
+
| ------------------------ | -------- | -------- | ------------- | ----------- |
|
| 28 |
+
| First Language (TELUGU) | A1 | A1 | A1 | 10 |
|
| 29 |
+
| Third Language (ENGLISH) | A1 | A2 | A1 | 10 |
|
| 30 |
+
| Mathematics | A1 | A1 | A1 | 10 |
|
| 31 |
+
| Science | A1 | A2 | A1 | 09 |
|
| 32 |
+
| Social Studies | A1 | A1 | A1 | 10 |
|
| 33 |
+
| Second Language (HINDI) | A1 | B1 | B1 | 08 |
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
### **Cumulative Grade Point Average (CGPA): 9.5**
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
### **CO-CURRICULAR AREAS:**
|
| 40 |
+
|
| 41 |
+
| Subject | Grade |
|
| 42 |
+
| ----------------------------- | ----- |
|
| 43 |
+
| Value Education & Life Skills | A+ |
|
| 44 |
+
| Art & Cultural Education | A+ |
|
| 45 |
+
| Work & Computer Education | A+ |
|
| 46 |
+
| Physical & Health Education | A+ |
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
**Head Master**
|
| 50 |
+
**EKALAVYA FOUNDATION SCHOOL**
|
| 51 |
+
**Nalgonda, Nalgonda**
|
| 52 |
+
**Date of Issue: 13th May, 2019**
|
| 53 |
+
|
| 54 |
+
**SECRETARY**
|
| 55 |
+
**Board of Secondary Education**
|
| 56 |
+
**Telangana State, Hyderabad**
|
| 57 |
+
|
| 58 |
+
---
|
| 59 |
+
|
| 60 |
+
12th class marks(Intermediate)
|
| 61 |
+
---
|
| 62 |
+
|
| 63 |
+
**Telangana State Board of Intermediate Education**
|
| 64 |
+
Vidya Bhavan, Nampally, Hyderabad - 500 001
|
| 65 |
+
|
| 66 |
+
**PASS CERTIFICATE-CUM-MEMORANDUM OF MARKS**
|
| 67 |
+
**This is to certify that**
|
| 68 |
+
**KATTA SAI PRANAV REDDY**
|
| 69 |
+
**Father’s Name:** KATTA SRINIVAS REDDY
|
| 70 |
+
**Mother’s Name:** KATTA UMARANI
|
| 71 |
+
**Registered Number:** 2158208799
|
| 72 |
+
**Month & Year of Exam:** MARCH 2021
|
| 73 |
+
**Medium:** ENGLISH
|
| 74 |
+
**Result:** A GRADE
|
| 75 |
+
has appeared for the Intermediate Public Examination held in March 2021 and passed in the following subjects:
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
### **Part - I**
|
| 79 |
+
|
| 80 |
+
**ENGLISH** – 98 / 100
|
| 81 |
+
**SANSKRIT** – 100 / 100
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
### **Part - II**
|
| 85 |
+
|
| 86 |
+
**HE** – 98 / 100
|
| 87 |
+
|
| 88 |
+
---
|
| 89 |
+
|
| 90 |
+
### **Part - III: Optional Subjects**
|
| 91 |
+
|
| 92 |
+
| Subject | Max Marks | Marks Obtained |
|
| 93 |
+
| -------------------- | --------- | -------------- |
|
| 94 |
+
| MATHEMATICS A | 75 | 75 |
|
| 95 |
+
| MATHEMATICS B | 75 | 75 |
|
| 96 |
+
| PHYSICS | 60 | 58 |
|
| 97 |
+
| PHYSICS PRACTICALS | 30 | 30 |
|
| 98 |
+
| CHEMISTRY | 60 | 60 |
|
| 99 |
+
| CHEMISTRY PRACTICALS | 30 | 30 |
|
| 100 |
+
|
| 101 |
+
---
|
| 102 |
+
|
| 103 |
+
### **Environmental Education:** QUALIFIED
|
| 104 |
+
|
| 105 |
+
### **Ethics and Human Values:** QUALIFIED
|
| 106 |
+
|
| 107 |
+
---
|
| 108 |
+
|
| 109 |
+
**Total Marks:**
|
| 110 |
+
In Figures: **982**
|
| 111 |
+
In Words: **NINE EIGHT TWO**
|
| 112 |
+
|
| 113 |
+
---
|
| 114 |
+
|
| 115 |
+
**Date:** 28-06-2021
|
| 116 |
+
**Signature of the Principal and College Seal:** Sri Chaitanya Jr. Kalasala
|
| 117 |
+
**Signature:** (Controller of Examinations)
|
| 118 |
+
|
| 119 |
+
---
|
| 120 |
+
|
| 121 |
+
Resume
|
| 122 |
+
---
|
| 123 |
+
**Katta Sai Pranav Reddy**
|
| 124 |
+
Email: [kattapranavreddy@gmail.com](mailto:kattapranavreddy@gmail.com)
|
| 125 |
+
GitHub: ka1817
|
| 126 |
+
LinkedIn: pranav-reddy-katta
|
| 127 |
+
|
| 128 |
+
---
|
| 129 |
+
|
| 130 |
+
### **Professional Summary**
|
| 131 |
+
|
| 132 |
+
AI and ML Engineer skilled in developing end-to-end machine learning and Generative AI solutions for real-world business challenges. Proficient in data preprocessing, exploratory data analysis, and building predictive models to deliver actionable insights. Experienced in leveraging advanced AI techniques and data-driven strategies to create scalable, impactful solutions.
|
| 133 |
+
|
| 134 |
+
---
|
| 135 |
+
|
| 136 |
+
### **Education**
|
| 137 |
+
|
| 138 |
+
* **Anurag University**, Hyderabad, India
|
| 139 |
+
B.Tech in Artificial Intelligence and Machine Learning; CGPA: 8.29
|
| 140 |
+
*09/2021 – 04/2025*
|
| 141 |
+
|
| 142 |
+
* **Sri Chaitanya Junior College**, Hyderabad, India
|
| 143 |
+
MPC (Maths, Physics, Chemistry); Percentage: 98%
|
| 144 |
+
*06/2019 – 05/2021*
|
| 145 |
+
|
| 146 |
+
---
|
| 147 |
+
|
| 148 |
+
### Experience
|
| 149 |
+
|
| 150 |
+
**iNeuron Intelligence Pvt. Ltd.** *(Remote)*
|
| 151 |
+
*Machine Learning Intern — 10/2024 – 11/2024*
|
| 152 |
+
|
| 153 |
+
* Conducted extensive data preprocessing and exploratory data analysis (EDA) on large customer datasets to identify key behavioral patterns and high-value customer segments.
|
| 154 |
+
* Developed and trained machine learning models for customer segmentation using clustering techniques such as K-Means and Hierarchical Clustering, enhancing marketing strategy alignment.
|
| 155 |
+
* Collaborated with cross-functional teams to interpret analytical insights and monitored model performance across different stages of the pipeline, ensuring accuracy and consistency.
|
| 156 |
+
* Delivered actionable recommendations based on statistical analysis and predictive modeling, supporting data-driven decision-making for targeted marketing campaigns.
|
| 157 |
+
|
| 158 |
+
**Unified Mentor Pvt. Ltd.** *(Remote)*
|
| 159 |
+
*Data Science Intern — 09/2024 – 10/2024*
|
| 160 |
+
|
| 161 |
+
* Developed and optimized machine learning models to predict employee attrition, enabling proactive retention strategies and improving workforce stability.
|
| 162 |
+
* Conducted comprehensive data preprocessing, feature engineering, and exploratory data analysis (EDA) to identify key factors influencing employee turnover.
|
| 163 |
+
* Delivered actionable insights and visualized patterns through dashboards and reports, supporting HR teams in making data-driven decisions.
|
| 164 |
+
* Presented findings to stakeholders, translating complex analytics into clear, strategic recommendations for reducing attrition risk.
|
| 165 |
+
|
| 166 |
+
---
|
| 167 |
+
|
| 168 |
+
### **Projects**
|
| 169 |
+
|
| 170 |
+
**BigBasket SmartCart – AI-Driven Shopping Assistant** *(06/2025 – 07/2025)* \[GitHub]
|
| 171 |
+
|
| 172 |
+
* Led development of an AI-driven shopping assistant using RAG, enabling natural language queries and semantic product search with 95% retrieval accuracy for real-time product recommendations.
|
| 173 |
+
* Developed a retrieval pipeline using the gte-small model, FAISS indexing, and Cross-Encoder reranking, which improved relevance score to 0.89 for intent-driven search results.
|
| 174 |
+
* Designed a modular architecture with FastAPI, HTML/CSS, and Docker, ensuring scalability and reduced response latency to \~2 seconds for seamless interactions.
|
| 175 |
+
* Implemented GitHub Actions for automated testing, Docker builds, and AWS EC2 deployment, which reduced deployment time by 40% and improved system reliability.
|
| 176 |
+
🛒 BigBasket SmartCart – AI Assistant for BigBasket Shopping
|
| 177 |
+
🧾 Introduction
|
| 178 |
+
The rapid evolution of AI technologies has created new opportunities for enhancing user experience in digital commerce. Leveraging state-of-the-art language models and retrieval systems, intelligent assistants can now understand complex queries, process vast amounts of product data, and deliver precise, context-aware responses. This project presents a scalable and robust AI-powered shopping assistant tailored for BigBasket's product ecosystem. Built using Retrieval-Augmented Generation (RAG), vector embeddings, and large language models (LLMs), the system enables efficient and intelligent product discovery through natural language interaction.
|
| 179 |
+
|
| 180 |
+
❗ Problem Statement
|
| 181 |
+
Online shoppers frequently seek personalized and context-specific product recommendations, such as identifying the best-rated skincare item at the lowest price. However, conventional search systems often fall short in understanding such nuanced queries, lacking the ability to interpret intent, compare attributes across products, and deliver concise, relevant results. This creates friction in the user journey, leading to suboptimal shopping experiences. There is a clear need for an intelligent assistant that can process natural language queries, reason over structured product data, and deliver accurate, insightful responses to aid decision-making.
|
| 182 |
+
|
| 183 |
+
Business Goal:
|
| 184 |
+
To enhance the shopping experience, boost conversion rates, and optimize search efficiency by enabling natural language-based product search that understands user intent and delivers context-aware, personalized recommendations.
|
| 185 |
+
|
| 186 |
+
💰 Business Impact (Revenue + Cost)
|
| 187 |
+
💸 1. Increased Conversion Rates (↑ Revenue)
|
| 188 |
+
|
| 189 |
+
• Users find relevant products faster, leading to more product views, cart adds, and purchases
|
| 190 |
+
|
| 191 |
+
• Personalized recommendations match buyer intent better than traditional search
|
| 192 |
+
|
| 193 |
+
• Better UX = lower drop-off rates
|
| 194 |
+
📈 Even a 1–2% uplift in conversions from improved product search can lead to significant revenue gains for a large marketplace like BigBasket.
|
| 195 |
+
|
| 196 |
+
📉 2. Reduced Customer Support Queries (↓ Cost)
|
| 197 |
+
|
| 198 |
+
• AI assistant can handle informational and product-related queries
|
| 199 |
+
|
| 200 |
+
• Reduces manual intervention, live chat support, and email volume
|
| 201 |
+
|
| 202 |
+
• More self-service = less operational overhead
|
| 203 |
+
⏱️ 3. Reduced Time-to-Purchase (↑ Efficiency)
|
| 204 |
+
|
| 205 |
+
• Customers make faster decisions because the assistant summarizes comparisons (e.g., price vs. rating trade-offs)
|
| 206 |
+
|
| 207 |
+
• This shortens the purchase journey and increases user satisfaction
|
| 208 |
+
🧪 4. Rapid Experimentation & Deployment (↓ Dev Costs)
|
| 209 |
+
|
| 210 |
+
• The project is modular, Dockerized, and CI/CD enabled → easier to iterate and deploy
|
| 211 |
+
|
| 212 |
+
• Can be extended to other verticals (electronics, fashion) or other marketplaces with minimal changes
|
| 213 |
+
🚀 Features
|
| 214 |
+
🔍 Natural Language Product Search Users can ask queries like "cheapest skin care with highest rating" or "best perfume under ₹500".
|
| 215 |
+
|
| 216 |
+
🧠 Query Rewriting with LLM Uses Groq LLMs (gemma2-9b-it) to refine user queries for more precise retrieval.
|
| 217 |
+
|
| 218 |
+
📄 Document Embedding & Vector Search Preprocessed BigBasket product data embedded with thenlper/gte-small and indexed using FAISS.
|
| 219 |
+
|
| 220 |
+
🤖 RAG Pipeline Uses llama3-70b-8192 model for final answer generation based on retrieved and reranked results.
|
| 221 |
+
|
| 222 |
+
🔁 Reranking with CrossEncoder Improves accuracy using cross-encoder/ms-marco-MiniLM-L-6-v2.
|
| 223 |
+
|
| 224 |
+
🌐 FastAPI Backend Easily accessible via localhost:8000 or deployed server.
|
| 225 |
+
|
| 226 |
+
🐳 Dockerized Build once, run anywhere. Fully containerized using Docker.
|
| 227 |
+
|
| 228 |
+
🚰 CI/CD with GitHub Actions Automated testing, image build, and push to DockerHub.
|
| 229 |
+
|
| 230 |
+
📜 Logging Logging implemented for each step in the pipeline for transparency and debugging.
|
| 231 |
+
|
| 232 |
+
🗂️ Folder Structure
|
| 233 |
+
BIGBASKET/
|
| 234 |
+
├── .github/
|
| 235 |
+
│ └── workflows/
|
| 236 |
+
│ └── ci-cd.yml
|
| 237 |
+
├── data/
|
| 238 |
+
│ └── BigBasket Products.csv
|
| 239 |
+
├── logs/
|
| 240 |
+
│ ├── data_ingestion.log
|
| 241 |
+
│ ├── data_preprocessing.log
|
| 242 |
+
│ ├── query_rewriting.log
|
| 243 |
+
│ └── retrieval_generation.log
|
| 244 |
+
├── src/
|
| 245 |
+
│ ├── utils/
|
| 246 |
+
│ │ └── logger.py
|
| 247 |
+
│ ├── __init__.py
|
| 248 |
+
│ ├── data_ingestion.py
|
| 249 |
+
│ ├── data_preprocessing.py
|
| 250 |
+
│ ├── query_rewritting.py
|
| 251 |
+
│ └── retrival_genaration.py
|
| 252 |
+
├── static/
|
| 253 |
+
│ └── css/
|
| 254 |
+
│ └── style.css
|
| 255 |
+
├── templates/
|
| 256 |
+
│ └── index.html
|
| 257 |
+
├── tests/
|
| 258 |
+
├── ui/
|
| 259 |
+
├── main.py
|
| 260 |
+
├── Dockerfile
|
| 261 |
+
├── requirements.txt
|
| 262 |
+
├── .env
|
| 263 |
+
├── .dockerignore
|
| 264 |
+
├── .gitignore
|
| 265 |
+
└── README.md
|
| 266 |
+
🧪 Local Development Setup
|
| 267 |
+
# Clone the repository
|
| 268 |
+
git clone https://github.com/ka1817/BigBasket-SmartCart-AI-Assistant-for-BigBasket-Shopping
|
| 269 |
+
cd BigBasket
|
| 270 |
+
|
| 271 |
+
# Create virtual environment
|
| 272 |
+
python -m venv venv
|
| 273 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 274 |
+
|
| 275 |
+
# Install dependencies
|
| 276 |
+
pip install -r requirements.txt
|
| 277 |
+
|
| 278 |
+
# Before Running the app set .env(environment variable GROQ_API_KEY)
|
| 279 |
+
uvicorn main:app --reload --port 8000
|
| 280 |
+
🐳 Docker Instructions
|
| 281 |
+
🔧 1. Pull Image
|
| 282 |
+
|
| 283 |
+
docker pull pranavreddy123/bigbasket-assistant:latest
|
| 284 |
+
🚀 2. Run the App (Detached Mode)
|
| 285 |
+
|
| 286 |
+
docker run -d -p 8000:8000 \
|
| 287 |
+
-e GROQ_API_KEY=create groq api from groq cloud \
|
| 288 |
+
pranavreddy123/bigbasket-assistant:latest
|
| 289 |
+
🌐 3. Access the App
|
| 290 |
+
|
| 291 |
+
http://localhost:8000
|
| 292 |
+
🤖 Example Usage
|
| 293 |
+
|
| 294 |
+
Query: "Which is the cheapest hair product with high rating?" Rewritten: "Find the most affordable hair care product with a high customer rating." Response: "Garlic Oil - Vegetarian Capsule 500 mg by Sri Sri Ayurveda is available at ₹220 with a 4.1 rating."
|
| 295 |
+
|
| 296 |
+
🛠️ GitHub Actions (CI/CD)
|
| 297 |
+
File: .github/workflows/ci-cd.yml
|
| 298 |
+
|
| 299 |
+
✅ CI-Test: Runs unit tests using pytest.
|
| 300 |
+
|
| 301 |
+
🐳 CD-Docker: Builds Docker image and pushes to DockerHub.
|
| 302 |
+
|
| 303 |
+
Triggered on push to main or pull request.
|
| 304 |
+
|
| 305 |
+
☁️ Deployment on Amazon EC2
|
| 306 |
+
1. Launch EC2 Instance (Ubuntu 20.04)
|
| 307 |
+
2. SSH into your instance
|
| 308 |
+
ssh -i "your-key.pem" ubuntu@your-ec2-ip
|
| 309 |
+
3. Install Docker
|
| 310 |
+
sudo apt update
|
| 311 |
+
sudo apt install docker.io -y
|
| 312 |
+
sudo systemctl start docker
|
| 313 |
+
sudo systemctl enable docker
|
| 314 |
+
4. Pull and Run Docker Image
|
| 315 |
+
docker pull pranavreddy123/bigbasket-assistant:latest
|
| 316 |
+
# Ensure your .env file is in the same directory, or create an API key using Groq Cloud and add it to the .env file
|
| 317 |
+
docker run -d --env-file .env -p 8000:8000 pranavreddy123/bigbasket-assistant:latest
|
| 318 |
+
Access your app via http://<your-ec2-public-ip>
|
| 319 |
+
🧠 Tech Stack
|
| 320 |
+
✅ LLMs: Groq (gemma2-9b-it, llama3-70b-8192)
|
| 321 |
+
|
| 322 |
+
✅ LangChain, FAISS, HuggingFace, CrossEncoder
|
| 323 |
+
|
| 324 |
+
✅ FastAPI
|
| 325 |
+
|
| 326 |
+
✅ Docker
|
| 327 |
+
|
| 328 |
+
✅ GitHub Actions
|
| 329 |
+
|
| 330 |
+
✅ AWS EC2
|
| 331 |
+
|
| 332 |
+
✅ HTML/CSS
|
| 333 |
+
|
| 334 |
+
🔗 Links
|
| 335 |
+
🔍 GitHub Repo: BigBasket-SmartCart-AI-Assistant-for-BigBasket-Shopping
|
| 336 |
+
|
| 337 |
+
🐳 DockerHub: pranavreddy123/bigbasket-assistant
|
| 338 |
+
|
| 339 |
+
🧑💻 Developed By
|
| 340 |
+
Pranav Reddy
|
| 341 |
+
|
| 342 |
+
**Netflix Customer Churn Prediction – End-to-End ML System** *(Personal Project)* \[GitHub]
|
| 343 |
+
|
| 344 |
+
* Developed a complete machine learning pipeline to predict customer churn, achieving 99% recall and 0.99 ROC AUC through feature engineering, hyperparameter tuning, and cross-validation.
|
| 345 |
+
* Performed in-depth EDA to identify key churn drivers such as low engagement, infrequent logins, and payment methods, improving model interpretability and business insights.
|
| 346 |
+
* Implemented reproducible MLOps workflows with data versioning using DVC and AWS S3, and tracked experiments, metrics, and model artifacts using MLflow.
|
| 347 |
+
* Designed and deployed a FastAPI-based REST API with HTML/CSS frontend for real-time predictions, containerized the application using Docker, and automated CI/CD using GitHub Actions for deployment on AWS EC2.
|
| 348 |
+
|
| 349 |
+
📊 Netflix Customer Churn Prediction
|
| 350 |
+
🚀 Project Overview
|
| 351 |
+
Netflix, like many subscription-based platforms, faces the challenge of customer churn. Retaining existing customers is significantly more cost-effective than acquiring new ones. This project delivers a full-scale machine learning solution to predict customer churn using behavioral and subscription data, from ingestion to deployment via a FastAPI interface.
|
| 352 |
+
|
| 353 |
+
This repository presents a production-grade, explainable, and reproducible ML pipeline with CI/CD, experiment tracking (MLflow), data versioning (DVC), and containerized deployment using Docker.
|
| 354 |
+
|
| 355 |
+
🎯 Problem Statement
|
| 356 |
+
Netflix seeks to proactively identify users likely to cancel their subscriptions. Predicting churn enables targeted interventions to retain users and minimize revenue loss.
|
| 357 |
+
|
| 358 |
+
Goal: Build an ML classification model that predicts churn based on customer behavior and plan details.
|
| 359 |
+
|
| 360 |
+
📌 Key Features Used
|
| 361 |
+
Feature Type Description
|
| 362 |
+
watch_hours Numerical Total hours watched
|
| 363 |
+
last_login_days Numerical Days since last login
|
| 364 |
+
number_of_profiles Numerical Total profiles under the account
|
| 365 |
+
avg_watch_time_per_day Numerical Daily average watch time
|
| 366 |
+
subscription_type Categorical Subscription level: Basic, Standard, Premium
|
| 367 |
+
payment_method Categorical Payment method: Credit Card, UPI, PayPal, etc.
|
| 368 |
+
churned Target 1 = Churned, 0 = Not churned
|
| 369 |
+
📊 Key EDA Insights
|
| 370 |
+
🔬 Feature Significance
|
| 371 |
+
Feature Test p-value Significant?
|
| 372 |
+
subscription_type Chi-Square 0.0000 ✅ Yes
|
| 373 |
+
payment_method Chi-Square 0.0000 ✅ Yes
|
| 374 |
+
number_of_profiles Chi-Square 0.0000 ✅ Yes
|
| 375 |
+
watch_hours Mann-Whitney U 0.0000 ✅ Yes
|
| 376 |
+
last_login_days Mann-Whitney U 0.0000 ✅ Yes
|
| 377 |
+
avg_watch_time_per_day Mann-Whitney U 0.0000 ✅ Yes
|
| 378 |
+
age Mann-Whitney U 0.7803 ❌ No
|
| 379 |
+
gender, region, device Chi-Square > 0.3 ❌ No
|
| 380 |
+
✅ These statistically significant features were included in the final model pipeline.
|
| 381 |
+
|
| 382 |
+
🏗️ Project Architecture
|
| 383 |
+
netflix-churn-prediction/
|
| 384 |
+
├── data/ # Raw and processed data
|
| 385 |
+
├── models/ # Trained model binaries
|
| 386 |
+
├── reports/ # Classification reports & plots
|
| 387 |
+
├── static/ # CSS
|
| 388 |
+
├── templates/ # HTML UI
|
| 389 |
+
├── src/
|
| 390 |
+
│ ├── data_ingestion.py # Load dataset
|
| 391 |
+
│ ├── data_preprocessing.py # Pipeline for scaling & encoding
|
| 392 |
+
│ └── model_training.py # ML training & evaluation
|
| 393 |
+
├── main.py # FastAPI backend
|
| 394 |
+
├── Dockerfile # Containerization
|
| 395 |
+
├── .dvc/ # DVC for data version control
|
| 396 |
+
├── .github/workflows/ # CI/CD GitHub Actions
|
| 397 |
+
└── README.md
|
| 398 |
+
⚙️ End-to-End ML Workflow
|
| 399 |
+
1️⃣ Data Ingestion
|
| 400 |
+
Loads .csv into DataFrame
|
| 401 |
+
Handles errors and logs shape/summary
|
| 402 |
+
2️⃣ Preprocessing
|
| 403 |
+
OneHotEncoding (categorical)
|
| 404 |
+
StandardScaler (numerical)
|
| 405 |
+
Uses ColumnTransformer for pipeline modularity
|
| 406 |
+
3️⃣ Model Training
|
| 407 |
+
Models: RandomForest, GradientBoosting, SVC
|
| 408 |
+
GridSearchCV for hyperparameter tuning
|
| 409 |
+
Model artifacts saved to models/
|
| 410 |
+
ROC curves + classification reports saved to reports/
|
| 411 |
+
4️⃣ MLflow Tracking ✅
|
| 412 |
+
Tracks experiment metadata, metrics, parameters
|
| 413 |
+
Stores models and artifacts
|
| 414 |
+
UI accessible at localhost:5000
|
| 415 |
+
🧪 Model Performance
|
| 416 |
+
Model Accuracy F1 Score ROC AUC (Test) ROC AUC (CV) Notes
|
| 417 |
+
Random Forest 0.99 0.99 0.9995 0.9987 ✅ Best overall【13†source】
|
| 418 |
+
Gradient Boosting 0.99 0.99 0.9989 0.9991 Robust & efficient【12†source】
|
| 419 |
+
SVC 0.93 0.93 0.9844 0.9822 Lightweight【14†source】
|
| 420 |
+
🌐 FastAPI Deployment
|
| 421 |
+
🔧 API Endpoints:
|
| 422 |
+
/: HTML frontend form for manual input
|
| 423 |
+
/api/predict: JSON-based API for programmatic inference
|
| 424 |
+
🔌 Model Used:
|
| 425 |
+
Random Forest (best AUC + accuracy)
|
| 426 |
+
Accepts form or JSON input
|
| 427 |
+
Returns churn prediction + confidence
|
| 428 |
+
🐳 Docker Setup
|
| 429 |
+
FROM python:3.10-slim
|
| 430 |
+
WORKDIR /app
|
| 431 |
+
COPY requirements.txt .
|
| 432 |
+
RUN pip install -r requirements.txt
|
| 433 |
+
COPY . .
|
| 434 |
+
EXPOSE 8000
|
| 435 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 436 |
+
Run locally:
|
| 437 |
+
|
| 438 |
+
docker build -t netflix-churn .
|
| 439 |
+
docker run -p 8000:8000 netflix-churn
|
| 440 |
+
🔁 CI/CD Pipeline (GitHub Actions)
|
| 441 |
+
✅ Stages:
|
| 442 |
+
Test Phase
|
| 443 |
+
|
| 444 |
+
Install dependencies
|
| 445 |
+
Run pytest on unit tests
|
| 446 |
+
Pull versioned data using dvc pull
|
| 447 |
+
Build Phase
|
| 448 |
+
|
| 449 |
+
Docker image build with CACHEBUST arg
|
| 450 |
+
Push to DockerHub using GitHub Secrets
|
| 451 |
+
Deploy Phase
|
| 452 |
+
|
| 453 |
+
SSH into EC2 instance
|
| 454 |
+
Stop, remove old container
|
| 455 |
+
Pull and launch updated Docker image
|
| 456 |
+
🔐 GitHub Repository Secrets
|
| 457 |
+
Name Purpose
|
| 458 |
+
AWS_ACCESS_KEY_ID AWS auth for DVC S3
|
| 459 |
+
AWS_SECRET_ACCESS_KEY AWS auth for DVC S3
|
| 460 |
+
DOCKER_USERNAME DockerHub username for push
|
| 461 |
+
DOCKER_PASSWORD DockerHub password/token
|
| 462 |
+
EC2_HOST Public IP/DNS of EC2 instance
|
| 463 |
+
EC2_USER SSH user for EC2 login
|
| 464 |
+
EC2_SSH_KEY Private SSH key for GitHub Actions
|
| 465 |
+
🧬 Data Versioning with DVC
|
| 466 |
+
Tracks raw and preprocessed data versions
|
| 467 |
+
Uses .dvc/config to connect to AWS S3 remote
|
| 468 |
+
Run dvc push and dvc pull to sync across environments
|
| 469 |
+
Ensures reproducibility in CI and local experiments
|
| 470 |
+
📌 Business Value & Insights
|
| 471 |
+
🧠 High-risk churn users are linked to:
|
| 472 |
+
|
| 473 |
+
Low engagement (low watch hours)
|
| 474 |
+
Infrequent logins
|
| 475 |
+
Basic plans & non-card payments
|
| 476 |
+
📈 Operational Benefits:
|
| 477 |
+
|
| 478 |
+
Preemptive retention campaigns
|
| 479 |
+
Personalized offers to vulnerable users
|
| 480 |
+
Reduce marketing costs via targeted outreach
|
| 481 |
+
✅ Run Locally (No Docker)
|
| 482 |
+
git clone <repo_url>
|
| 483 |
+
cd netflix-churn-prediction
|
| 484 |
+
python src/model_training.py # Train all models
|
| 485 |
+
uvicorn main:app --reload # Launch API server
|
| 486 |
+
Summary
|
| 487 |
+
Component Implemented Tool/Service Used
|
| 488 |
+
Data Versioning ✅ DVC with AWS S3 remote
|
| 489 |
+
Data Ingestion ✅ pandas, custom Python class
|
| 490 |
+
Data Preprocessing ✅ scikit-learn Pipelines
|
| 491 |
+
Model Training ✅ scikit-learn, GridSearchCV
|
| 492 |
+
Experiment Tracking ✅ MLflow (local server: 127.0.0.1:5000)
|
| 493 |
+
Model Evaluation ✅ classification_report, ROC AUC
|
| 494 |
+
Model Packaging ✅ joblib for serialization
|
| 495 |
+
API Deployment ✅ FastAPI on AWS EC2
|
| 496 |
+
Web UI ✅ HTML + Bootstrap via Jinja2
|
| 497 |
+
Containerization ✅ Docker (with Dockerfile)
|
| 498 |
+
CI/CD Pipeline ✅ GitHub Actions
|
| 499 |
+
Cloud Hosting ✅ AWS EC2, SSH-based deployment
|
| 500 |
+
Secrets Management ✅ GitHub Secrets
|
| 501 |
+
Testing ✅ pytest, CI-tested
|
| 502 |
+
🙌 Author
|
| 503 |
+
👨💻 Katta Sai Pranav Reddy
|
| 504 |
+
📎 Tech Stack
|
| 505 |
+
Python 3.10
|
| 506 |
+
Scikit-learn, MLflow, DVC, FastAPI, Docker
|
| 507 |
+
GitHub Actions, AWS EC2, S3 Remote Storage
|
| 508 |
+
---
|
| 509 |
+
|
| 510 |
+
### **Skills**
|
| 511 |
+
|
| 512 |
+
* **Tools:** MLflow, DVC, Docker, Git, GitHub Actions, AWS (EC2, S3, ECR), FAISS, Pinecone, Hugging Face, LangChain, LangSmith, FastAPI
|
| 513 |
+
* **Programming & Technical Skills:** Python, SQL, HTML, CSS, Scikit-learn, TensorFlow, Keras, Statistics
|
| 514 |
+
* **Data Science & Machine Learning:** Data Preprocessing, EDA, Feature Engineering, Model Training & Evaluation, Hyperparameter Tuning, Clustering, MLOps, Semantic Search, Retrieval-Augmented Generation (RAG), CNN, RNN, GPT, Transformers, Fine-Tuning, Prompt Engineering
|
| 515 |
+
* **Data Visualization & Analysis:** Pandas, NumPy, Matplotlib, Seaborn
|
| 516 |
+
|
| 517 |
+
---
|
| 518 |
+
|
| 519 |
+
hobbies section
|
| 520 |
+
|
| 521 |
+
---
|
| 522 |
+
|
| 523 |
+
### **Hobbies & Interests**
|
| 524 |
+
Hobbies & Interests
|
| 525 |
+
|
| 526 |
+
* Playing Cricket
|
| 527 |
+
* Watching Football
|
| 528 |
+
* Reading Books
|
| 529 |
+
* Exploring Latest Advancements in Artificial Intelligence
|
| 530 |
+
* Browsing the Internet for Tech & Knowledge Updates
|
| 531 |
+
|
| 532 |
+
---
|
| 533 |
+
|
| 534 |
+
### Contact Information
|
| 535 |
+
|
| 536 |
+
Contact Information
|
| 537 |
+
|
| 538 |
+
📞 **Phone:** +91 93475 41040
|
| 539 |
+
📧 **Email:** [kattapranavreddy@gmail.com](mailto:kattapranavreddy@gmail.com)
|
| 540 |
+
💻 **GitHub:** [github.com/ka1817](https://github.com/ka1817)
|
| 541 |
+
🔗 **LinkedIn:** [linkedin.com/in/pranav-reddy-katta](https://www.linkedin.com/in/pranav-reddy-katta/)
|
| 542 |
+
---
|
| 543 |
+
|
| 544 |
+
10th
|
| 545 |
+
---
|
| 546 |
+
|
| 547 |
+
**Board of Secondary Education
|
| 548 |
+
Telangana State, India**
|
| 549 |
+
|
| 550 |
+
**SECONDARY SCHOOL CERTIFICATE**
|
| 551 |
+
**REGULAR** PC/29/4222/04/256517/3
|
| 552 |
+
**TS-EE 524495**
|
| 553 |
+
|
| 554 |
+
---
|
| 555 |
+
|
| 556 |
+
**CERTIFIED THAT**
|
| 557 |
+
**KATTA SAI PRANAV REDDY**
|
| 558 |
+
**Father's Name:** KATTA SRINIVAS REDDY
|
| 559 |
+
**Mother's Name:** KATTA UMARANI
|
| 560 |
+
**Roll No.:** 1929100642
|
| 561 |
+
**Date of Birth:** 03/06/2003 (Zero Three June Two Zero Zero Three)
|
| 562 |
+
**School:** EKALAVYA FOUNDATION SCL NALGONDA, NALGONDA DISTRICT
|
| 563 |
+
**Medium:** ENGLISH
|
| 564 |
+
|
| 565 |
+
Has appeared and **PASSED SSC EXAMINATION** held in **MARCH–2019**
|
| 566 |
+
|
| 567 |
+
---
|
| 568 |
+
|
| 569 |
+
### **The Candidate Secured the Following Grade and Grade Points in Curricular Areas:**
|
| 570 |
+
|
| 571 |
+
| Subject | Grade FA | Grade SA | Overall Grade | Grade Point |
|
| 572 |
+
| ------------------------ | -------- | -------- | ------------- | ----------- |
|
| 573 |
+
| First Language (TELUGU) | A1 | A1 | A1 | 10 |
|
| 574 |
+
| Third Language (ENGLISH) | A1 | A2 | A1 | 10 |
|
| 575 |
+
| Mathematics | A1 | A1 | A1 | 10 |
|
| 576 |
+
| Science | A1 | A2 | A1 | 09 |
|
| 577 |
+
| Social Studies | A1 | A1 | A1 | 10 |
|
| 578 |
+
| Second Language (HINDI) | A1 | B1 | B1 | 08 |
|
| 579 |
+
|
| 580 |
+
---
|
| 581 |
+
|
| 582 |
+
### **Cumulative Grade Point Average (CGPA): 9.5**
|
| 583 |
+
|
| 584 |
+
---
|
| 585 |
+
|
| 586 |
+
### **CO-CURRICULAR AREAS:**
|
| 587 |
+
|
| 588 |
+
| Subject | Grade |
|
| 589 |
+
| ----------------------------- | ----- |
|
| 590 |
+
| Value Education & Life Skills | A+ |
|
| 591 |
+
| Art & Cultural Education | A+ |
|
| 592 |
+
| Work & Computer Education | A+ |
|
| 593 |
+
| Physical & Health Education | A+ |
|
| 594 |
+
|
| 595 |
+
---
|
| 596 |
+
|
| 597 |
+
### **Marks of Identification:**
|
| 598 |
+
|
| 599 |
+
1. A MOLE ON THE LEFT HAND RING FINGER
|
| 600 |
+
2. A MOLE ON THE RIGHT ELBOW
|
| 601 |
+
|
| 602 |
+
---
|
| 603 |
+
|
| 604 |
+
**Head Master**
|
| 605 |
+
**EKALAVYA FOUNDATION SCHOOL**
|
| 606 |
+
**Nalgonda, Nalgonda**
|
| 607 |
+
**Date of Issue: 13th May, 2019**
|
| 608 |
+
|
| 609 |
+
---
|
| 610 |
+
|
| 611 |
+
**SECRETARY**
|
| 612 |
+
**Board of Secondary Education**
|
| 613 |
+
**Telangana State, Hyderabad**
|
| 614 |
+
|
| 615 |
+
**Aadhaar No.:** 774291627518
|
| 616 |
+
|
| 617 |
+
---
|
| 618 |
+
|
| 619 |
+
12th(Intermediate)
|
| 620 |
+
---
|
| 621 |
+
|
| 622 |
+
**Telangana State Board of Intermediate Education**
|
| 623 |
+
Vidya Bhavan, Nampally, Hyderabad - 500 001
|
| 624 |
+
|
| 625 |
+
**PASS CERTIFICATE-CUM-MEMORANDUM OF MARKS**
|
| 626 |
+
**This is to certify that**
|
| 627 |
+
**KATTA SAI PRANAV REDDY**
|
| 628 |
+
**Father’s Name:** KATTA SRINIVAS REDDY
|
| 629 |
+
**Mother’s Name:** KATTA UMARANI
|
| 630 |
+
**Registered Number:** 2158208799
|
| 631 |
+
**Month & Year of Exam:** MARCH 2021
|
| 632 |
+
**Medium:** ENGLISH
|
| 633 |
+
**Result:** A GRADE
|
| 634 |
+
has appeared for the Intermediate Public Examination held in March 2021 and passed in the following subjects:
|
| 635 |
+
|
| 636 |
+
---
|
| 637 |
+
|
| 638 |
+
### **Part - I**
|
| 639 |
+
|
| 640 |
+
**ENGLISH** – 98 / 100
|
| 641 |
+
**SANSKRIT** – 100 / 100
|
| 642 |
+
|
| 643 |
+
---
|
| 644 |
+
|
| 645 |
+
### **Part - II**
|
| 646 |
+
|
| 647 |
+
**HE** – 98 / 100
|
| 648 |
+
|
| 649 |
+
---
|
| 650 |
+
|
| 651 |
+
### **Part - III: Optional Subjects**
|
| 652 |
+
|
| 653 |
+
| Subject | Max Marks | Marks Obtained |
|
| 654 |
+
| -------------------- | --------- | -------------- |
|
| 655 |
+
| MATHEMATICS A | 75 | 75 |
|
| 656 |
+
| MATHEMATICS B | 75 | 75 |
|
| 657 |
+
| PHYSICS | 60 | 58 |
|
| 658 |
+
| PHYSICS PRACTICALS | 30 | 30 |
|
| 659 |
+
| CHEMISTRY | 60 | 60 |
|
| 660 |
+
| CHEMISTRY PRACTICALS | 30 | 30 |
|
| 661 |
+
|
| 662 |
+
---
|
| 663 |
+
|
| 664 |
+
### **Environmental Education:** QUALIFIED
|
| 665 |
+
|
| 666 |
+
### **Ethics and Human Values:** QUALIFIED
|
| 667 |
+
|
| 668 |
+
---
|
| 669 |
+
|
| 670 |
+
**Total Marks:**
|
| 671 |
+
In Figures: **982**
|
| 672 |
+
In Words: **NINE EIGHT TWO**
|
| 673 |
+
|
| 674 |
+
---
|
| 675 |
+
|
| 676 |
+
**Date:** 28-06-2021
|
| 677 |
+
**Signature of the Principal and College Seal:** Sri Chaitanya Jr. Kalasala
|
| 678 |
+
**Signature:** (Controller of Examinations)
|
| 679 |
+
|
| 680 |
+
---
|
| 681 |
+
|
| 682 |
+
Resume
|
| 683 |
+
---
|
| 684 |
+
**Katta Sai Pranav Reddy**
|
| 685 |
+
Email: [kattapranavreddy@gmail.com](mailto:kattapranavreddy@gmail.com)
|
| 686 |
+
GitHub: ka1817
|
| 687 |
+
LinkedIn: pranav-reddy-katta
|
| 688 |
+
|
| 689 |
+
---
|
| 690 |
+
|
| 691 |
+
### **Professional Summary**
|
| 692 |
+
|
| 693 |
+
AI and ML Engineer skilled in developing end-to-end machine learning and Generative AI solutions for real-world business challenges. Proficient in data preprocessing, exploratory data analysis, and building predictive models to deliver actionable insights. Experienced in leveraging advanced AI techniques and data-driven strategies to create scalable, impactful solutions.
|
| 694 |
+
|
| 695 |
+
---
|
| 696 |
+
|
| 697 |
+
# Education
|
| 698 |
+
|
| 699 |
+
* **Anurag University**, Hyderabad, India
|
| 700 |
+
B.Tech in Artificial Intelligence and Machine Learning; CGPA: 8.29
|
| 701 |
+
*09/2021 – 04/2025*
|
| 702 |
+
|
| 703 |
+
* **Sri Chaitanya Junior College**, Hyderabad, India
|
| 704 |
+
MPC (Maths, Physics, Chemistry); Percentage: 98%
|
| 705 |
+
*06/2019 – 05/2021*
|
| 706 |
+
|
| 707 |
+
-----------------------------------------
|
| 708 |
+
|
| 709 |
+
### Pranav Work Experience
|
| 710 |
+
|
| 711 |
+
Work Experience
|
| 712 |
+
|
| 713 |
+
iNeuron Intelligence Pvt. Ltd.(Remote)
|
| 714 |
+
Machine Learning Intern — 10/2024 – 11/2024
|
| 715 |
+
|
| 716 |
+
1.Conducted extensive data preprocessing and exploratory data analysis (EDA) on large customer datasets to identify key behavioral patterns and high-value customer segments.
|
| 717 |
+
2.Developed and trained machine learning models for customer segmentation using clustering techniques such as K-Means and Hierarchical Clustering, enhancing marketing strategy alignment.
|
| 718 |
+
3.Collaborated with cross-functional teams to interpret analytical insights and monitored model performance across different stages of the pipeline, ensuring accuracy and consistency.
|
| 719 |
+
4.Delivered actionable recommendations based on statistical analysis and predictive modeling, supporting data-driven decision-making for targeted marketing campaigns.
|
| 720 |
+
|
| 721 |
+
Unified Mentor Pvt. Ltd.(Remote)
|
| 722 |
+
Data Science Intern — 09/2024 – 10/2024
|
| 723 |
+
1. Developed and optimized machine learning models to predict employee attrition, enabling proactive retention strategies and improving workforce stability.
|
| 724 |
+
2. Conducted comprehensive data preprocessing, feature engineering, and exploratory data analysis (EDA) to identify key factors influencing employee turnover.
|
| 725 |
+
3. Delivered actionable insights and visualized patterns through dashboards and reports, supporting HR teams in making data-driven decisions.
|
| 726 |
+
4. Presented findings to stakeholders, translating complex analytics into clear, strategic recommendations for reducing attrition risk.
|
| 727 |
+
|
| 728 |
+
|
| 729 |
+
-------------------------
|
| 730 |
+
|
| 731 |
+
# Projects
|
| 732 |
+
|
| 733 |
+
Pranav Reddy's Projects
|
| 734 |
+
|
| 735 |
+
1. 🛒 BigBasket SmartCart – AI Assistant for BigBasket Shopping
|
| 736 |
+
---
|
| 737 |
+
## 🧾 Introduction
|
| 738 |
+
|
| 739 |
+
The rapid evolution of AI technologies has created new opportunities for enhancing user experience in digital commerce. Leveraging state-of-the-art language models and retrieval systems, intelligent assistants can now understand complex queries, process vast amounts of product data, and deliver precise, context-aware responses. This project presents a scalable and robust AI-powered shopping assistant tailored for BigBasket's product ecosystem. Built using Retrieval-Augmented Generation (RAG), vector embeddings, and large language models (LLMs), the system enables efficient and intelligent product discovery through natural language interaction.
|
| 740 |
+
|
| 741 |
+
---
|
| 742 |
+
|
| 743 |
+
## ❗ Problem Statement
|
| 744 |
+
|
| 745 |
+
Online shoppers frequently seek personalized and context-specific product recommendations, such as identifying the best-rated skincare item at the lowest price. However, conventional search systems often fall short in understanding such nuanced queries, lacking the ability to interpret intent, compare attributes across products, and deliver concise, relevant results. This creates friction in the user journey, leading to suboptimal shopping experiences. There is a clear need for an intelligent assistant that can process natural language queries, reason over structured product data, and deliver accurate, insightful responses to aid decision-making.
|
| 746 |
+
|
| 747 |
+
---
|
| 748 |
+
|
| 749 |
+
## Business Goal:
|
| 750 |
+
|
| 751 |
+
To enhance the shopping experience, boost conversion rates, and optimize search efficiency by enabling natural language-based product search that understands user intent and delivers context-aware, personalized recommendations.
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
|
| 755 |
+
## 💰 Business Impact (Revenue + Cost)
|
| 756 |
+
|
| 757 |
+
💸 1. Increased Conversion Rates (↑ Revenue)
|
| 758 |
+
|
| 759 |
+
• Users find relevant products faster, leading to more product views, cart adds, and purchases
|
| 760 |
+
|
| 761 |
+
• Personalized recommendations match buyer intent better than traditional search
|
| 762 |
+
|
| 763 |
+
• Better UX = lower drop-off rates
|
| 764 |
+
|
| 765 |
+
📈 Even a 1–2% uplift in conversions from improved product search can lead to significant revenue gains for a large marketplace like BigBasket.
|
| 766 |
+
|
| 767 |
+
📉 2. Reduced Customer Support Queries (↓ Cost)
|
| 768 |
+
|
| 769 |
+
• AI assistant can handle informational and product-related queries
|
| 770 |
+
|
| 771 |
+
• Reduces manual intervention, live chat support, and email volume
|
| 772 |
+
|
| 773 |
+
• More self-service = less operational overhead
|
| 774 |
+
|
| 775 |
+
⏱️ 3. Reduced Time-to-Purchase (↑ Efficiency)
|
| 776 |
+
|
| 777 |
+
• Customers make faster decisions because the assistant summarizes comparisons (e.g., price vs. rating trade-offs)
|
| 778 |
+
|
| 779 |
+
• This shortens the purchase journey and increases user satisfaction
|
| 780 |
+
|
| 781 |
+
🧪 4. Rapid Experimentation & Deployment (↓ Dev Costs)
|
| 782 |
+
|
| 783 |
+
• The project is modular, Dockerized, and CI/CD enabled → easier to iterate and deploy
|
| 784 |
+
|
| 785 |
+
• Can be extended to other verticals (electronics, fashion) or other marketplaces with minimal changes
|
| 786 |
+
|
| 787 |
+
---
|
| 788 |
+
|
| 789 |
+
## 🚀 Features
|
| 790 |
+
|
| 791 |
+
🔍 Natural Language Product Search
|
| 792 |
+
Users can ask queries like "cheapest skin care with highest rating" or "best perfume under ₹500".
|
| 793 |
+
|
| 794 |
+
🧠 Query Rewriting with LLM
|
| 795 |
+
Uses Groq LLMs (gemma2-9b-it) to refine user queries for more precise retrieval.
|
| 796 |
+
|
| 797 |
+
📄 Document Embedding & Vector Search
|
| 798 |
+
Preprocessed BigBasket product data embedded with thenlper/gte-small and indexed using FAISS.
|
| 799 |
+
|
| 800 |
+
🤖 RAG Pipeline
|
| 801 |
+
Uses llama3-70b-8192 model for final answer generation based on retrieved and reranked results.
|
| 802 |
+
|
| 803 |
+
🔁 Reranking with CrossEncoder
|
| 804 |
+
Improves accuracy using cross-encoder/ms-marco-MiniLM-L-6-v2.
|
| 805 |
+
|
| 806 |
+
🌐 FastAPI Backend
|
| 807 |
+
Easily accessible via localhost:8000 or deployed server.
|
| 808 |
+
|
| 809 |
+
🐳 Dockerized
|
| 810 |
+
Build once, run anywhere. Fully containerized using Docker.
|
| 811 |
+
|
| 812 |
+
🚰 CI/CD with GitHub Actions
|
| 813 |
+
Automated testing, image build, and push to DockerHub.
|
| 814 |
+
|
| 815 |
+
📜 Logging
|
| 816 |
+
Logging implemented for each step in the pipeline for transparency and debugging.
|
| 817 |
+
|
| 818 |
+
---
|
| 819 |
+
|
| 820 |
+
## 🗂️ Folder Structure
|
| 821 |
+
|
| 822 |
+
```bash
|
| 823 |
+
BIGBASKET/
|
| 824 |
+
├── .github/
|
| 825 |
+
│ └── workflows/
|
| 826 |
+
│ └── ci-cd.yml
|
| 827 |
+
├── data/
|
| 828 |
+
│ └── BigBasket Products.csv
|
| 829 |
+
├── logs/
|
| 830 |
+
│ ├── data_ingestion.log
|
| 831 |
+
│ ├── data_preprocessing.log
|
| 832 |
+
│ ├── query_rewriting.log
|
| 833 |
+
│ └── retrieval_generation.log
|
| 834 |
+
├── src/
|
| 835 |
+
│ ├── utils/
|
| 836 |
+
│ │ └── logger.py
|
| 837 |
+
│ ├── __init__.py
|
| 838 |
+
│ ├── data_ingestion.py
|
| 839 |
+
│ ├── data_preprocessing.py
|
| 840 |
+
│ ├── query_rewritting.py
|
| 841 |
+
│ └── retrival_genaration.py
|
| 842 |
+
├── static/
|
| 843 |
+
│ └── css/
|
| 844 |
+
│ └── style.css
|
| 845 |
+
├── templates/
|
| 846 |
+
│ └── index.html
|
| 847 |
+
├── tests/
|
| 848 |
+
├── ui/
|
| 849 |
+
├── main.py
|
| 850 |
+
├── Dockerfile
|
| 851 |
+
├── requirements.txt
|
| 852 |
+
├── .env
|
| 853 |
+
├── .dockerignore
|
| 854 |
+
├── .gitignore
|
| 855 |
+
└── README.md
|
| 856 |
+
```
|
| 857 |
+
|
| 858 |
+
---
|
| 859 |
+
|
| 860 |
+
# 🧪 Local Development Setup
|
| 861 |
+
|
| 862 |
+
```bash
|
| 863 |
+
# Clone the repository
|
| 864 |
+
git clone https://github.com/ka1817/BigBasket-SmartCart-AI-Assistant-for-BigBasket-Shopping
|
| 865 |
+
cd BigBasket
|
| 866 |
+
|
| 867 |
+
# Create virtual environment
|
| 868 |
+
python -m venv venv
|
| 869 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 870 |
+
|
| 871 |
+
# Install dependencies
|
| 872 |
+
pip install -r requirements.txt
|
| 873 |
+
|
| 874 |
+
# Before Running the app set .env(environment variable GROQ_API_KEY)
|
| 875 |
+
uvicorn main:app --reload --port 8000
|
| 876 |
+
```
|
| 877 |
+
|
| 878 |
+
## 🐳 Docker Instructions
|
| 879 |
+
|
| 880 |
+
🔧 1. Pull Image
|
| 881 |
+
|
| 882 |
+
```bash
|
| 883 |
+
docker pull pranavreddy123/bigbasket-assistant:latest
|
| 884 |
+
```
|
| 885 |
+
|
| 886 |
+
🚀 2. Run the App (Detached Mode)
|
| 887 |
+
|
| 888 |
+
```bash
|
| 889 |
+
docker run -d -p 8000:8000 \
|
| 890 |
+
-e GROQ_API_KEY=create groq api from groq cloud \
|
| 891 |
+
pranavreddy123/bigbasket-assistant:latest
|
| 892 |
+
```
|
| 893 |
+
|
| 894 |
+
🌐 3. Access the App
|
| 895 |
+
|
| 896 |
+
```bash
|
| 897 |
+
http://localhost:8000
|
| 898 |
+
```
|
| 899 |
+
|
| 900 |
+
---
|
| 901 |
+
|
| 902 |
+
🤖 Example Usage
|
| 903 |
+
|
| 904 |
+
Query: "Which is the cheapest hair product with high rating?"
|
| 905 |
+
Rewritten: "Find the most affordable hair care product with a high customer rating."
|
| 906 |
+
Response: "Garlic Oil - Vegetarian Capsule 500 mg by Sri Sri Ayurveda is available at ₹220 with a 4.1 rating."
|
| 907 |
+
|
| 908 |
+
---
|
| 909 |
+
|
| 910 |
+
## 🛠️ GitHub Actions (CI/CD)
|
| 911 |
+
|
| 912 |
+
File: .github/workflows/ci-cd.yml
|
| 913 |
+
|
| 914 |
+
✅ CI-Test: Runs unit tests using pytest.
|
| 915 |
+
|
| 916 |
+
🐳 CD-Docker: Builds Docker image and pushes to DockerHub.
|
| 917 |
+
|
| 918 |
+
Triggered on push to main or pull request.
|
| 919 |
+
|
| 920 |
+
---
|
| 921 |
+
|
| 922 |
+
## ☁️ Deployment on Amazon EC2
|
| 923 |
+
|
| 924 |
+
### 1. Launch EC2 Instance (Ubuntu 20.04)
|
| 925 |
+
|
| 926 |
+
### 2. SSH into your instance
|
| 927 |
+
|
| 928 |
+
```bash
|
| 929 |
+
ssh -i "your-key.pem" ubuntu@your-ec2-ip
|
| 930 |
+
```
|
| 931 |
+
|
| 932 |
+
### 3. Install Docker
|
| 933 |
+
|
| 934 |
+
```bash
|
| 935 |
+
sudo apt update
|
| 936 |
+
sudo apt install docker.io -y
|
| 937 |
+
sudo systemctl start docker
|
| 938 |
+
sudo systemctl enable docker
|
| 939 |
+
```
|
| 940 |
+
|
| 941 |
+
### 4. Pull and Run Docker Image
|
| 942 |
+
|
| 943 |
+
```bash
|
| 944 |
+
docker pull pranavreddy123/bigbasket-assistant:latest
|
| 945 |
+
# Ensure your .env file is in the same directory, or create an API key using Groq Cloud and add it to the .env file
|
| 946 |
+
docker run -d --env-file .env -p 8000:8000 pranavreddy123/bigbasket-assistant:latest
|
| 947 |
+
```
|
| 948 |
+
|
| 949 |
+
## Access your app via `http://<your-ec2-public-ip>`
|
| 950 |
+
|
| 951 |
+
## 🧠 Tech Stack
|
| 952 |
+
|
| 953 |
+
✅ LLMs: Groq (gemma2-9b-it, llama3-70b-8192)
|
| 954 |
+
|
| 955 |
+
✅ LangChain, FAISS, HuggingFace, CrossEncoder
|
| 956 |
+
|
| 957 |
+
✅ FastAPI
|
| 958 |
+
|
| 959 |
+
✅ Docker
|
| 960 |
+
|
| 961 |
+
✅ GitHub Actions
|
| 962 |
+
|
| 963 |
+
✅ AWS EC2
|
| 964 |
+
|
| 965 |
+
✅ HTML/CSS
|
| 966 |
+
|
| 967 |
+
---
|
| 968 |
+
|
| 969 |
+
## 🔗 Links
|
| 970 |
+
|
| 971 |
+
🔍 GitHub Repo: BigBasket-SmartCart-AI-Assistant-for-BigBasket-Shopping
|
| 972 |
+
|
| 973 |
+
🐳 DockerHub: pranavreddy123/bigbasket-assistant
|
| 974 |
+
|
| 975 |
+
---
|
| 976 |
+
|
| 977 |
+
## 🧑💻 Developed By
|
| 978 |
+
|
| 979 |
+
Pranav Reddy
|
| 980 |
+
|
| 981 |
+
|
| 982 |
+
2.Netflix Customer Churn Prediction – End-to-End ML System** *(Personal Project)* \[GitHub]
|
| 983 |
+
|
| 984 |
+
* Developed a complete machine learning pipeline to predict customer churn, achieving 99% recall and 0.99 ROC AUC through feature engineering, hyperparameter tuning, and cross-validation.
|
| 985 |
+
* Performed in-depth EDA to identify key churn drivers such as low engagement, infrequent logins, and payment methods, improving model interpretability and business insights.
|
| 986 |
+
* Implemented reproducible MLOps workflows with data versioning using DVC and AWS S3, and tracked experiments, metrics, and model artifacts using MLflow.
|
| 987 |
+
* Designed and deployed a FastAPI-based REST API with HTML/CSS frontend for real-time predictions, containerized the application using Docker, and automated CI/CD using GitHub Actions for deployment on AWS EC2.
|
| 988 |
+
|
| 989 |
+
📊 Netflix Customer Churn Prediction
|
| 990 |
+
# 📊 Netflix Customer Churn Prediction
|
| 991 |
+
|
| 992 |
+
# Project Overview
|
| 993 |
+
|
| 994 |
+
Netflix, like many subscription-based platforms, faces the challenge of customer churn. Retaining existing customers is significantly more cost-effective than acquiring new ones. This project delivers a full-scale machine learning solution to predict customer churn using behavioral and subscription data, from ingestion to deployment via a FastAPI interface.
|
| 995 |
+
|
| 996 |
+
This repository presents a production-grade, explainable, and reproducible ML pipeline with CI/CD, experiment tracking (**MLflow**), data versioning (**DVC**), and containerized deployment using **Docker**.
|
| 997 |
+
|
| 998 |
+
---
|
| 999 |
+
|
| 1000 |
+
# Problem Statement
|
| 1001 |
+
|
| 1002 |
+
Netflix seeks to proactively identify users likely to cancel their subscriptions. Predicting churn enables targeted interventions to retain users and minimize revenue loss.
|
| 1003 |
+
|
| 1004 |
+
> **Goal:** Build an ML classification model that predicts churn based on customer behavior and plan details.
|
| 1005 |
+
|
| 1006 |
+
---
|
| 1007 |
+
|
| 1008 |
+
## Key Features Used
|
| 1009 |
+
|
| 1010 |
+
| Feature | Type | Description |
|
| 1011 |
+
| -------------------------- | ----------- | ---------------------------------------------- |
|
| 1012 |
+
| watch\_hours | Numerical | Total hours watched |
|
| 1013 |
+
| last\_login\_days | Numerical | Days since last login |
|
| 1014 |
+
| number\_of\_profiles | Numerical | Total profiles under the account |
|
| 1015 |
+
| avg\_watch\_time\_per\_day | Numerical | Daily average watch time |
|
| 1016 |
+
| subscription\_type | Categorical | Subscription level: Basic, Standard, Premium |
|
| 1017 |
+
| payment\_method | Categorical | Payment method: Credit Card, UPI, PayPal, etc. |
|
| 1018 |
+
| churned | Target | 1 = Churned, 0 = Not churned |
|
| 1019 |
+
|
| 1020 |
+
---
|
| 1021 |
+
|
| 1022 |
+
## Key EDA Insights
|
| 1023 |
+
|
| 1024 |
+
### 🔬 Feature Significance
|
| 1025 |
+
|
| 1026 |
+
| Feature | Test | p-value | Significant? |
|
| 1027 |
+
| -------------------------- | -------------- | ------- | ------------ |
|
| 1028 |
+
| subscription\_type | Chi-Square | 0.0000 | ✅ Yes |
|
| 1029 |
+
| payment\_method | Chi-Square | 0.0000 | ✅ Yes |
|
| 1030 |
+
| number\_of\_profiles | Chi-Square | 0.0000 | ✅ Yes |
|
| 1031 |
+
| watch\_hours | Mann-Whitney U | 0.0000 | ✅ Yes |
|
| 1032 |
+
| last\_login\_days | Mann-Whitney U | 0.0000 | ✅ Yes |
|
| 1033 |
+
| avg\_watch\_time\_per\_day | Mann-Whitney U | 0.0000 | ✅ Yes |
|
| 1034 |
+
| age | Mann-Whitney U | 0.7803 | ❌ No |
|
| 1035 |
+
| gender, region, device | Chi-Square | > 0.3 | ❌ No |
|
| 1036 |
+
|
| 1037 |
+
> ✅ These statistically significant features were included in the final model pipeline.
|
| 1038 |
+
|
| 1039 |
+
---
|
| 1040 |
+
|
| 1041 |
+
## Project Architecture
|
| 1042 |
+
|
| 1043 |
+
```bash
|
| 1044 |
+
netflix-churn-prediction/
|
| 1045 |
+
├── data/ # Raw and processed data
|
| 1046 |
+
├── models/ # Trained model binaries
|
| 1047 |
+
├── reports/ # Classification reports & plots
|
| 1048 |
+
├── static/ # CSS
|
| 1049 |
+
├── templates/ # HTML UI
|
| 1050 |
+
├── src/
|
| 1051 |
+
│ ├── data_ingestion.py # Load dataset
|
| 1052 |
+
│ ├── data_preprocessing.py # Pipeline for scaling & encoding
|
| 1053 |
+
│ └── model_training.py # ML training & evaluation
|
| 1054 |
+
├── main.py # FastAPI backend
|
| 1055 |
+
├── Dockerfile # Containerization
|
| 1056 |
+
├── .dvc/ # DVC for data version control
|
| 1057 |
+
├── .github/workflows/ # CI/CD GitHub Actions
|
| 1058 |
+
└── README.md
|
| 1059 |
+
```
|
| 1060 |
+
|
| 1061 |
+
---
|
| 1062 |
+
|
| 1063 |
+
## ⚙️ End-to-End ML Workflow
|
| 1064 |
+
|
| 1065 |
+
### 1️⃣ Data Ingestion
|
| 1066 |
+
|
| 1067 |
+
* Loads `.csv` into DataFrame
|
| 1068 |
+
* Handles errors and logs shape/summary
|
| 1069 |
+
|
| 1070 |
+
### 2️⃣ Preprocessing
|
| 1071 |
+
|
| 1072 |
+
* OneHotEncoding (categorical)
|
| 1073 |
+
* StandardScaler (numerical)
|
| 1074 |
+
* Uses `ColumnTransformer` for pipeline modularity
|
| 1075 |
+
|
| 1076 |
+
### 3️⃣ Model Training
|
| 1077 |
+
|
| 1078 |
+
* Models: `RandomForest`, `GradientBoosting`, `SVC`
|
| 1079 |
+
* `GridSearchCV` for hyperparameter tuning
|
| 1080 |
+
* Model artifacts saved to `models/`
|
| 1081 |
+
* ROC curves + classification reports saved to `reports/`
|
| 1082 |
+
|
| 1083 |
+
### 4️⃣ MLflow Tracking ✅
|
| 1084 |
+
|
| 1085 |
+
* Tracks experiment metadata, metrics, parameters
|
| 1086 |
+
* Stores models and artifacts
|
| 1087 |
+
* UI accessible at `localhost:5000`
|
| 1088 |
+
|
| 1089 |
+
---
|
| 1090 |
+
|
| 1091 |
+
## 🧪 Model Performance
|
| 1092 |
+
|
| 1093 |
+
| Model | Accuracy | F1 Score | ROC AUC (Test) | ROC AUC (CV) | Notes |
|
| 1094 |
+
| ----------------- | -------- | -------- | -------------- | ------------ | ----------------------------- |
|
| 1095 |
+
| Random Forest | 0.99 | 0.99 | **0.9995** | 0.9987 | ✅ Best overall【13†source】 |
|
| 1096 |
+
| Gradient Boosting | 0.99 | 0.99 | 0.9989 | 0.9991 | Robust & efficient【12†source】 |
|
| 1097 |
+
| SVC | 0.93 | 0.93 | 0.9844 | 0.9822 | Lightweight【14†source】 |
|
| 1098 |
+
|
| 1099 |
+
---
|
| 1100 |
+
|
| 1101 |
+
## 🌐 FastAPI Deployment
|
| 1102 |
+
|
| 1103 |
+
### 🔧 API Endpoints:
|
| 1104 |
+
|
| 1105 |
+
* `/`: HTML frontend form for manual input
|
| 1106 |
+
* `/api/predict`: JSON-based API for programmatic inference
|
| 1107 |
+
|
| 1108 |
+
### 🔌 Model Used:
|
| 1109 |
+
|
| 1110 |
+
* Random Forest (best AUC + accuracy)
|
| 1111 |
+
* Accepts form or JSON input
|
| 1112 |
+
* Returns churn prediction + confidence
|
| 1113 |
+
|
| 1114 |
+
---
|
| 1115 |
+
|
| 1116 |
+
## 🐳 Docker Setup
|
| 1117 |
+
|
| 1118 |
+
```Dockerfile
|
| 1119 |
+
FROM python:3.10-slim
|
| 1120 |
+
WORKDIR /app
|
| 1121 |
+
COPY requirements.txt .
|
| 1122 |
+
RUN pip install -r requirements.txt
|
| 1123 |
+
COPY . .
|
| 1124 |
+
EXPOSE 8000
|
| 1125 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 1126 |
+
```
|
| 1127 |
+
|
| 1128 |
+
Run locally:
|
| 1129 |
+
|
| 1130 |
+
```bash
|
| 1131 |
+
docker build -t netflix-churn .
|
| 1132 |
+
docker run -p 8000:8000 netflix-churn
|
| 1133 |
+
```
|
| 1134 |
+
|
| 1135 |
+
---
|
| 1136 |
+
|
| 1137 |
+
## 🔁 CI/CD Pipeline (GitHub Actions)
|
| 1138 |
+
|
| 1139 |
+
### ✅ Stages:
|
| 1140 |
+
|
| 1141 |
+
1. **Test Phase**
|
| 1142 |
+
|
| 1143 |
+
* Install dependencies
|
| 1144 |
+
* Run `pytest` on unit tests
|
| 1145 |
+
* Pull versioned data using `dvc pull`
|
| 1146 |
+
|
| 1147 |
+
2. **Build Phase**
|
| 1148 |
+
|
| 1149 |
+
* Docker image build with `CACHEBUST` arg
|
| 1150 |
+
* Push to DockerHub using GitHub Secrets
|
| 1151 |
+
|
| 1152 |
+
3. **Deploy Phase**
|
| 1153 |
+
|
| 1154 |
+
* SSH into EC2 instance
|
| 1155 |
+
* Stop, remove old container
|
| 1156 |
+
* Pull and launch updated Docker image
|
| 1157 |
+
|
| 1158 |
+
### 🔐 GitHub Repository Secrets
|
| 1159 |
+
|
| 1160 |
+
| Name | Purpose |
|
| 1161 |
+
| ----------------------- | ---------------------------------- |
|
| 1162 |
+
| `AWS_ACCESS_KEY_ID` | AWS auth for DVC S3 |
|
| 1163 |
+
| `AWS_SECRET_ACCESS_KEY` | AWS auth for DVC S3 |
|
| 1164 |
+
| `DOCKER_USERNAME` | DockerHub username for push |
|
| 1165 |
+
| `DOCKER_PASSWORD` | DockerHub password/token |
|
| 1166 |
+
| `EC2_HOST` | Public IP/DNS of EC2 instance |
|
| 1167 |
+
| `EC2_USER` | SSH user for EC2 login |
|
| 1168 |
+
| `EC2_SSH_KEY` | Private SSH key for GitHub Actions |
|
| 1169 |
+
|
| 1170 |
+
---
|
| 1171 |
+
|
| 1172 |
+
## 🧬 Data Versioning with DVC
|
| 1173 |
+
|
| 1174 |
+
* Tracks raw and preprocessed data versions
|
| 1175 |
+
* Uses `.dvc/config` to connect to **AWS S3** remote
|
| 1176 |
+
* Run `dvc push` and `dvc pull` to sync across environments
|
| 1177 |
+
* Ensures reproducibility in CI and local experiments
|
| 1178 |
+
|
| 1179 |
+
---
|
| 1180 |
+
|
| 1181 |
+
## 📌 Business Value & Insights
|
| 1182 |
+
|
| 1183 |
+
* 🧠 **High-risk churn users** are linked to:
|
| 1184 |
+
|
| 1185 |
+
* Low engagement (low watch hours)
|
| 1186 |
+
* Infrequent logins
|
| 1187 |
+
* Basic plans & non-card payments
|
| 1188 |
+
|
| 1189 |
+
* 📈 **Operational Benefits**:
|
| 1190 |
+
|
| 1191 |
+
* Preemptive retention campaigns
|
| 1192 |
+
* Personalized offers to vulnerable users
|
| 1193 |
+
* Reduce marketing costs via targeted outreach
|
| 1194 |
+
|
| 1195 |
+
---
|
| 1196 |
+
|
| 1197 |
+
## ✅ Run Locally (No Docker)
|
| 1198 |
+
|
| 1199 |
+
```bash
|
| 1200 |
+
git clone <repo_url>
|
| 1201 |
+
cd netflix-churn-prediction
|
| 1202 |
+
python src/model_training.py # Train all models
|
| 1203 |
+
uvicorn main:app --reload # Launch API server
|
| 1204 |
+
```
|
| 1205 |
+
|
| 1206 |
+
---
|
| 1207 |
+
## Summary
|
| 1208 |
+
|
| 1209 |
+
| **Component** | **Implemented** | **Tool/Service Used** |
|
| 1210 |
+
| ------------------------ | --------------- | ----------------------------------------- |
|
| 1211 |
+
| **Data Versioning** | ✅ | `DVC` with `AWS S3` remote |
|
| 1212 |
+
| **Data Ingestion** | ✅ | `pandas`, custom Python class |
|
| 1213 |
+
| **Data Preprocessing** | ✅ | `scikit-learn` Pipelines |
|
| 1214 |
+
| **Model Training** | ✅ | `scikit-learn`, `GridSearchCV` |
|
| 1215 |
+
| **Experiment Tracking** | ✅ | `MLflow` (local server: `127.0.0.1:5000`) |
|
| 1216 |
+
| **Model Evaluation** | ✅ | `classification_report`, ROC AUC |
|
| 1217 |
+
| **Model Packaging** | ✅ | `joblib` for serialization |
|
| 1218 |
+
| **API Deployment** | ✅ | `FastAPI` on `AWS EC2` |
|
| 1219 |
+
| **Web UI** | ✅ | HTML + Bootstrap via Jinja2 |
|
| 1220 |
+
| **Containerization** | ✅ | `Docker` (with `Dockerfile`) |
|
| 1221 |
+
| **CI/CD Pipeline** | ✅ | `GitHub Actions` |
|
| 1222 |
+
| **Cloud Hosting** | ✅ | `AWS EC2`, SSH-based deployment |
|
| 1223 |
+
| **Secrets Management** | ✅ | `GitHub Secrets` |
|
| 1224 |
+
| **Testing** | ✅ | `pytest`, CI-tested |
|
| 1225 |
+
|
| 1226 |
+
---
|
| 1227 |
+
## 🙌 Author
|
| 1228 |
+
|
| 1229 |
+
* 👨💻 Katta Sai Pranav Reddy
|
| 1230 |
+
|
| 1231 |
+
---
|
| 1232 |
+
## 🔗 Links
|
| 1233 |
+
|
| 1234 |
+
🔍 GitHub Repo: Netflix-Customer-Churn-Prediction-Using-Machine-Learning
|
| 1235 |
+
|
| 1236 |
+
🐳 DockerHub: pranavreddy123/netflix-churn-prediction
|
| 1237 |
+
|
| 1238 |
+
## 📎 Tech Stack
|
| 1239 |
+
|
| 1240 |
+
* **Python 3.10**
|
| 1241 |
+
* **Scikit-learn**, **MLflow**, **DVC**, **FastAPI**, **Docker**
|
| 1242 |
+
* **GitHub Actions**, **AWS EC2**, **S3 Remote Storage**
|
| 1243 |
+
---
|
| 1244 |
+
|
| 1245 |
+
### **Skills**
|
| 1246 |
+
|
| 1247 |
+
* **Tools:** MLflow, DVC, Docker, Git, GitHub Actions, AWS (EC2, S3, ECR), FAISS, Pinecone, Hugging Face, LangChain, LangSmith, FastAPI
|
| 1248 |
+
* **Programming & Technical Skills:** Python, SQL, HTML, CSS, Scikit-learn, TensorFlow, Keras, Statistics
|
| 1249 |
+
* **Data Science & Machine Learning:** Data Preprocessing, EDA, Feature Engineering, Model Training & Evaluation, Hyperparameter Tuning, Clustering, MLOps, Semantic Search, Retrieval-Augmented Generation (RAG), CNN, RNN, GPT, Transformers, Fine-Tuning, Prompt Engineering
|
| 1250 |
+
* **Data Visualization & Analysis:** Pandas, NumPy, Matplotlib, Seaborn
|
| 1251 |
+
|
| 1252 |
+
---
|
| 1253 |
+
|
| 1254 |
+
hobbies section
|
| 1255 |
+
|
| 1256 |
+
---
|
| 1257 |
+
|
| 1258 |
+
### **Hobbies & Interests**
|
| 1259 |
+
Hobbies & Interests
|
| 1260 |
+
|
| 1261 |
+
* Playing Cricket
|
| 1262 |
+
* Watching Football
|
| 1263 |
+
* Reading Books
|
| 1264 |
+
* Exploring Latest Advancements in Artificial Intelligence
|
| 1265 |
+
* Browsing the Internet for Tech & Knowledge Updates
|
| 1266 |
+
|
| 1267 |
+
---
|
| 1268 |
+
|
| 1269 |
+
### Contact Information
|
| 1270 |
+
|
| 1271 |
+
Contact Information
|
| 1272 |
+
|
| 1273 |
+
📞 Phone: +91 93475 41040
|
| 1274 |
+
📧 Email: [kattapranavreddy@gmail.com](mailto:kattapranavreddy@gmail.com)
|
| 1275 |
+
💻 GitHub: [github.com/ka1817](https://github.com/ka1817)
|
| 1276 |
+
🔗 LinkedIn: [linkedin.com/in/pranav-reddy-katta](https://www.linkedin.com/in/pranav-reddy-katta/)
|
| 1277 |
+
---
|
| 1278 |
+
|
| 1279 |
+
---
|
| 1280 |
+
Certifications By Pranav Reddy
|
| 1281 |
+
Certifications:
|
| 1282 |
+
(All from Udemy)
|
| 1283 |
+
- Python for Data Science and Machine Learning
|
| 1284 |
+
- The Complete SQL Bootcamp
|
| 1285 |
+
- Generative AI with LangChain and HuggingFace
|
| 1286 |
+
- End-To-End MLOps Bootcamp
|
| 1287 |
+
|
| 1288 |
+
---
|
| 1289 |
+
|
local_faiss_index/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c55f659ccefaca4d036e0cea1353effc17f22e7a080035c43c30739386c6806
|
| 3 |
+
size 101421
|
local_faiss_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98d51e77c2b495b1ec0b617fd15f33d955debb37bbdd2c5e14a897ac348f3a7e
|
| 3 |
+
size 65378
|
main.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Request
|
| 2 |
+
from fastapi.responses import HTMLResponse
|
| 3 |
+
from fastapi.staticfiles import StaticFiles
|
| 4 |
+
from fastapi.templating import Jinja2Templates
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from contextlib import asynccontextmanager
|
| 7 |
+
from src.retrival_generation import RetrievalGeneration
|
| 8 |
+
import uvicorn
|
| 9 |
+
|
| 10 |
+
class QueryRequest(BaseModel):
|
| 11 |
+
query: str
|
| 12 |
+
|
| 13 |
+
retriever = RetrievalGeneration(vectorstore_path="local_faiss_index")
|
| 14 |
+
|
| 15 |
+
@asynccontextmanager
|
| 16 |
+
async def lifespan(app: FastAPI):
|
| 17 |
+
retriever.init_vectorstore(rebuild=False)
|
| 18 |
+
retriever.build_rag_chain(k=10, top_n=5)
|
| 19 |
+
yield
|
| 20 |
+
|
| 21 |
+
app = FastAPI(lifespan=lifespan)
|
| 22 |
+
|
| 23 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 24 |
+
|
| 25 |
+
templates = Jinja2Templates(directory="templates")
|
| 26 |
+
|
| 27 |
+
@app.get("/", response_class=HTMLResponse)
|
| 28 |
+
def home(request: Request):
|
| 29 |
+
return templates.TemplateResponse("index.html", {"request": request})
|
| 30 |
+
|
| 31 |
+
@app.post("/predict")
|
| 32 |
+
def predict(request: QueryRequest):
|
| 33 |
+
response = retriever.rag_chain.invoke(request.query)
|
| 34 |
+
return {"response": response}
|
| 35 |
+
|
| 36 |
+
if __name__ == "__main__":
|
| 37 |
+
uvicorn.run("main:app", host="0.0.0.0", port=4000, reload=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain
|
| 2 |
+
langchain-groq
|
| 3 |
+
langchain-community
|
| 4 |
+
pypdf
|
| 5 |
+
python-dotenv
|
| 6 |
+
fastapi
|
| 7 |
+
sentence-transformers
|
| 8 |
+
faiss-cpu
|
| 9 |
+
uvicorn
|
src/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .data_ingestion import *
|
| 2 |
+
from .data_preprocessing import *
|
| 3 |
+
from .retrival_generation import *
|
src/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (203 Bytes). View file
|
|
|
src/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (278 Bytes). View file
|
|
|
src/__pycache__/data_ingestion.cpython-310.pyc
ADDED
|
Binary file (1.48 kB). View file
|
|
|
src/__pycache__/data_ingestion.cpython-311.pyc
ADDED
|
Binary file (2.79 kB). View file
|
|
|
src/__pycache__/data_preprocessing.cpython-310.pyc
ADDED
|
Binary file (1.48 kB). View file
|
|
|
src/__pycache__/data_preprocessing.cpython-311.pyc
ADDED
|
Binary file (2.41 kB). View file
|
|
|
src/__pycache__/evaluation.cpython-310.pyc
ADDED
|
Binary file (4.29 kB). View file
|
|
|
src/__pycache__/retrival_generation.cpython-310.pyc
ADDED
|
Binary file (4.22 kB). View file
|
|
|
src/__pycache__/retrival_generation.cpython-311.pyc
ADDED
|
Binary file (6.46 kB). View file
|
|
|
src/data_ingestion.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
from langchain_community.document_loaders import TextLoader
|
| 4 |
+
|
| 5 |
+
logging.basicConfig(
|
| 6 |
+
level=logging.INFO,
|
| 7 |
+
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
| 8 |
+
)
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class DataIngestion:
|
| 13 |
+
|
| 14 |
+
def __init__(self, path: str | None = None):
|
| 15 |
+
if path is None:
|
| 16 |
+
default_path = os.path.join(
|
| 17 |
+
os.path.dirname(__file__), "..", "data", "info.txt"
|
| 18 |
+
)
|
| 19 |
+
self.path = os.path.abspath(default_path)
|
| 20 |
+
logger.info(f"No path provided. Using default file: {self.path}")
|
| 21 |
+
else:
|
| 22 |
+
self.path = os.path.abspath(path)
|
| 23 |
+
logger.info(f"Using custom file path: {self.path}")
|
| 24 |
+
|
| 25 |
+
def load_data(self):
|
| 26 |
+
logger.debug(f"Checking if file exists at: {self.path}")
|
| 27 |
+
if not os.path.exists(self.path):
|
| 28 |
+
logger.error(f"File not found at {self.path}")
|
| 29 |
+
raise FileNotFoundError(f"File not found: {self.path}")
|
| 30 |
+
|
| 31 |
+
logger.info(f"Loading file: {self.path}")
|
| 32 |
+
loader = TextLoader(self.path, encoding="utf-8")
|
| 33 |
+
docs = loader.load()
|
| 34 |
+
logger.info(f"Loaded {len(docs)} documents from {self.path}")
|
| 35 |
+
return docs
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
src/data_preprocessing.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
+
from src.data_ingestion import DataIngestion
|
| 4 |
+
|
| 5 |
+
logging.basicConfig(
|
| 6 |
+
level=logging.INFO,
|
| 7 |
+
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
| 8 |
+
)
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class DataSplitting:
|
| 13 |
+
|
| 14 |
+
def __init__(self, chunk_size: int = 40, chunk_overlap: int = 20):
|
| 15 |
+
self.chunk_size = chunk_size
|
| 16 |
+
self.chunk_overlap = chunk_overlap
|
| 17 |
+
logger.info(
|
| 18 |
+
f"Initialized DataSplitting with chunk_size={chunk_size}, chunk_overlap={chunk_overlap}"
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def chunking(self):
|
| 22 |
+
logger.info("Starting document ingestion before splitting...")
|
| 23 |
+
data = DataIngestion()
|
| 24 |
+
docs = data.load_data()
|
| 25 |
+
logger.info(f"Received {len(docs)} documents for splitting.")
|
| 26 |
+
|
| 27 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 28 |
+
chunk_size=self.chunk_size,
|
| 29 |
+
chunk_overlap=self.chunk_overlap,
|
| 30 |
+
)
|
| 31 |
+
logger.debug("Splitter initialized. Splitting documents...")
|
| 32 |
+
chunks = splitter.split_documents(docs)
|
| 33 |
+
|
| 34 |
+
logger.info(f"Created {len(chunks)} chunks from {len(docs)} documents.")
|
| 35 |
+
return chunks
|
| 36 |
+
|
| 37 |
+
|
src/evaluation.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from datasets import Dataset
|
| 4 |
+
|
| 5 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 6 |
+
from langchain.vectorstores import FAISS
|
| 7 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
| 8 |
+
from langchain.retrievers.document_compressors.cross_encoder_rerank import CrossEncoderReranker
|
| 9 |
+
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
| 10 |
+
|
| 11 |
+
from langchain_groq import ChatGroq
|
| 12 |
+
from ragas import evaluate
|
| 13 |
+
from ragas.metrics import context_precision, context_recall, faithfulness, answer_relevancy
|
| 14 |
+
from ragas.run_config import RunConfig
|
| 15 |
+
|
| 16 |
+
from src.retrival_generation import RetrievalGeneration
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class Evaluation:
|
| 20 |
+
def __init__(self, vectorstore_path: str, llm_model: str = "llama-3.3-70b-versatile"):
|
| 21 |
+
load_dotenv()
|
| 22 |
+
self.groq_api_key = os.getenv("GROQ_API_KEY")
|
| 23 |
+
|
| 24 |
+
self.llm = ChatGroq(api_key=self.groq_api_key, model=llm_model)
|
| 25 |
+
|
| 26 |
+
self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
| 27 |
+
|
| 28 |
+
self.vectorstore_path = vectorstore_path
|
| 29 |
+
self.vectorstore = FAISS.load_local(
|
| 30 |
+
self.vectorstore_path,
|
| 31 |
+
self.embeddings,
|
| 32 |
+
allow_dangerous_deserialization=True
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
self.rg = RetrievalGeneration(vectorstore_path=vectorstore_path)
|
| 36 |
+
self.rg.init_vectorstore()
|
| 37 |
+
self.qa = self.rg.build_rag_chain()
|
| 38 |
+
|
| 39 |
+
def run(self, questions: list, ground_truth: list, use_reranker: bool = False):
|
| 40 |
+
"""Run evaluation with or without reranking"""
|
| 41 |
+
|
| 42 |
+
if use_reranker:
|
| 43 |
+
cross_encoder_model = HuggingFaceCrossEncoder(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2")
|
| 44 |
+
compressor = CrossEncoderReranker(model=cross_encoder_model, top_n=3)
|
| 45 |
+
retriever = ContextualCompressionRetriever(
|
| 46 |
+
base_compressor=compressor,
|
| 47 |
+
base_retriever=self.vectorstore.as_retriever(search_kwargs={"k": 10})
|
| 48 |
+
)
|
| 49 |
+
else:
|
| 50 |
+
retriever = self.vectorstore.as_retriever(search_kwargs={"k": 10})
|
| 51 |
+
|
| 52 |
+
answers, contexts = [], []
|
| 53 |
+
for query in questions:
|
| 54 |
+
answers.append(self.qa.invoke(query))
|
| 55 |
+
contexts.append([doc.page_content for doc in retriever.get_relevant_documents(query)])
|
| 56 |
+
|
| 57 |
+
data = {
|
| 58 |
+
"question": questions,
|
| 59 |
+
"ground_truth": ground_truth,
|
| 60 |
+
"answer": answers,
|
| 61 |
+
"contexts": contexts
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
dataset = Dataset.from_dict(data)
|
| 65 |
+
|
| 66 |
+
run_config = RunConfig(
|
| 67 |
+
timeout=290,
|
| 68 |
+
max_retries=5,
|
| 69 |
+
max_wait=30,
|
| 70 |
+
max_workers=1
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
result = evaluate(
|
| 74 |
+
dataset=dataset,
|
| 75 |
+
metrics=[context_precision, context_recall, faithfulness, answer_relevancy],
|
| 76 |
+
llm=self.llm,
|
| 77 |
+
embeddings=self.embeddings,
|
| 78 |
+
run_config=run_config,
|
| 79 |
+
batch_size=1
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
return result
|
| 83 |
+
if __name__ == "__main__":
|
| 84 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
| 85 |
+
vectorstore_path = os.path.join(base_dir, "..", "local_faiss_index")
|
| 86 |
+
|
| 87 |
+
evaluation = Evaluation(vectorstore_path)
|
| 88 |
+
|
| 89 |
+
questions = [
|
| 90 |
+
"What were Katta Sai Pranav Reddy’s 10th class marks and CGPA?",
|
| 91 |
+
"What subjects did Pranav Reddy study in 12th (Intermediate) and what were his marks?",
|
| 92 |
+
"Can you summarize Pranav Reddy’s professional and project experience?"
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
ground_truth = [
|
| 96 |
+
"Katta Sai Pranav Reddy completed his SSC in March 2019 at Ekalavya Foundation School, Nalgonda, securing A1 grades in most subjects and a B1 in Hindi, with an overall CGPA of 9.5.",
|
| 97 |
+
"In March 2021, Pranav Reddy finished his Intermediate education, achieving nearly full marks in English, Sanskrit, HE, and optional subjects like Mathematics, Physics, and Chemistry, with a total of 982 marks.",
|
| 98 |
+
"Pranav Reddy is an AI and ML engineer with internship experience at iNeuron Intelligence and Unified Mentor, where he worked on customer segmentation and attrition prediction. His projects include the BigBasket SmartCart AI Assistant and Netflix Churn Prediction, showcasing skills in Python, ML pipelines, FAISS, FastAPI, and Generative AI solutions."
|
| 99 |
+
]
|
| 100 |
+
|
| 101 |
+
# Run without reranker
|
| 102 |
+
print("🔹 Baseline Evaluation (no reranker)")
|
| 103 |
+
baseline_result = evaluation.run(questions, ground_truth, use_reranker=False)
|
| 104 |
+
print(baseline_result)
|
| 105 |
+
|
| 106 |
+
# Run with reranker
|
| 107 |
+
print("\n🔹 Evaluation with Reranker")
|
| 108 |
+
rerank_result = evaluation.run(questions, ground_truth, use_reranker=True)
|
| 109 |
+
print(rerank_result)
|
src/retrival_generation.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
import warnings
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from src.data_preprocessing import DataSplitting
|
| 6 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 7 |
+
from langchain_groq import ChatGroq
|
| 8 |
+
from langchain_community.vectorstores import FAISS
|
| 9 |
+
from langchain.prompts import PromptTemplate
|
| 10 |
+
from langchain.schema import StrOutputParser
|
| 11 |
+
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
|
| 12 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
| 13 |
+
from langchain.retrievers.document_compressors.cross_encoder_rerank import CrossEncoderReranker
|
| 14 |
+
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
| 15 |
+
|
| 16 |
+
warnings.filterwarnings("ignore")
|
| 17 |
+
|
| 18 |
+
logging.basicConfig(
|
| 19 |
+
level=logging.INFO,
|
| 20 |
+
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
| 21 |
+
handlers=[logging.StreamHandler()]
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
logger = logging.getLogger("RetrievalGeneration")
|
| 26 |
+
|
| 27 |
+
load_dotenv()
|
| 28 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 29 |
+
if not GROQ_API_KEY:
|
| 30 |
+
raise ValueError("GROQ_API_KEY not found in environment variables.")
|
| 31 |
+
|
| 32 |
+
llm = ChatGroq(model=os.getenv("GROQ_MODEL", "llama-3.1-8b-instant"))
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class RetrievalGeneration:
|
| 36 |
+
def __init__(self, vectorstore_path: str = "faiss_store"):
|
| 37 |
+
self.vectorstore_path = vectorstore_path
|
| 38 |
+
self.vectorstore = None
|
| 39 |
+
self.rag_chain = None
|
| 40 |
+
logger.info("RetrievalGeneration initialized with path: %s", vectorstore_path)
|
| 41 |
+
|
| 42 |
+
def init_vectorstore(self, rebuild: bool = False):
|
| 43 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
| 44 |
+
logger.info("Embeddings model loaded.")
|
| 45 |
+
|
| 46 |
+
if os.path.exists(self.vectorstore_path) and not rebuild:
|
| 47 |
+
logger.info("Loading existing FAISS index from: %s", self.vectorstore_path)
|
| 48 |
+
self.vectorstore = FAISS.load_local(
|
| 49 |
+
self.vectorstore_path, embeddings, allow_dangerous_deserialization=True
|
| 50 |
+
)
|
| 51 |
+
else:
|
| 52 |
+
logger.warning("Building new FAISS index...")
|
| 53 |
+
chunks = DataSplitting(chunk_size=2000, chunk_overlap=800).chunking()
|
| 54 |
+
logger.info("Data split into %d chunks", len(chunks))
|
| 55 |
+
self.vectorstore = FAISS.from_documents(chunks, embeddings)
|
| 56 |
+
self.vectorstore.save_local(self.vectorstore_path)
|
| 57 |
+
logger.info("FAISS index saved at: %s", self.vectorstore_path)
|
| 58 |
+
|
| 59 |
+
return self.vectorstore
|
| 60 |
+
|
| 61 |
+
def build_rag_chain(self, k: int = 10,top_n: int = 5):
|
| 62 |
+
if not self.vectorstore:
|
| 63 |
+
raise ValueError("Vectorstore not initialized. Run init_vectorstore() first.")
|
| 64 |
+
|
| 65 |
+
logger.info("Creating retriever from FAISS vectorstore (top_k=%d)...", k)
|
| 66 |
+
cross_encoder_model = HuggingFaceCrossEncoder(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2")
|
| 67 |
+
|
| 68 |
+
compressor = CrossEncoderReranker(model=cross_encoder_model, top_n=top_n)
|
| 69 |
+
|
| 70 |
+
retriever = ContextualCompressionRetriever(base_compressor=compressor,base_retriever=self.vectorstore.as_retriever(search_kwargs={"k": k}))
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
prompt = PromptTemplate(
|
| 74 |
+
template="""
|
| 75 |
+
You are a professional and concise AI assistant that answers questions
|
| 76 |
+
about the career, education, skills, projects, certifications, and professional
|
| 77 |
+
background of **Katta Sai Pranav Reddy**.
|
| 78 |
+
|
| 79 |
+
Your job is to:
|
| 80 |
+
- Use ONLY the provided context to answer.
|
| 81 |
+
- Be recruiter-friendly: structured, clear, and professional in tone.
|
| 82 |
+
- If the question is unrelated to Katta Sai Pranav Reddy’s professional profile,
|
| 83 |
+
politely decline by saying:
|
| 84 |
+
"I can only answer questions related to the professional background of Katta Sai Pranav Reddy."
|
| 85 |
+
- If the context does not provide enough information, say:
|
| 86 |
+
"The available information does not cover that detail."
|
| 87 |
+
|
| 88 |
+
Context:
|
| 89 |
+
{context}
|
| 90 |
+
|
| 91 |
+
Question:
|
| 92 |
+
{question}
|
| 93 |
+
|
| 94 |
+
Answer (clear, structured, recruiter-focused):
|
| 95 |
+
""",
|
| 96 |
+
input_variables=["context", "question"]
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
self.rag_chain = (
|
| 100 |
+
RunnableParallel({
|
| 101 |
+
"context": retriever,
|
| 102 |
+
"question": RunnablePassthrough()
|
| 103 |
+
})
|
| 104 |
+
| prompt
|
| 105 |
+
| llm
|
| 106 |
+
| StrOutputParser()
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
logger.info("RAG chain with reranking successfully built.")
|
| 110 |
+
return self.rag_chain
|
| 111 |
+
|
| 112 |
+
|
static/style.css
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
body {
|
| 2 |
+
font-family: Arial, sans-serif;
|
| 3 |
+
background: #f3f6fa;
|
| 4 |
+
margin: 0;
|
| 5 |
+
height: 100vh;
|
| 6 |
+
width: 100vw;
|
| 7 |
+
overflow: hidden;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
@media (min-width: 769px) {
|
| 11 |
+
body {
|
| 12 |
+
display: flex;
|
| 13 |
+
justify-content: center;
|
| 14 |
+
align-items: center;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
.chatbot-container {
|
| 18 |
+
width: 650px;
|
| 19 |
+
height: 580px;
|
| 20 |
+
border-radius: 12px;
|
| 21 |
+
box-shadow: 0 8px 20px rgba(0,0,0,0.15);
|
| 22 |
+
}
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
@media (max-width: 700px) {
|
| 26 |
+
body {
|
| 27 |
+
display: block;
|
| 28 |
+
width: 100vw;
|
| 29 |
+
height: 100vh;
|
| 30 |
+
margin: 0;
|
| 31 |
+
padding: 0;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
.chatbot-container {
|
| 35 |
+
width: 100vw;
|
| 36 |
+
height: 94vh;
|
| 37 |
+
border-radius: 0;
|
| 38 |
+
box-shadow: none;
|
| 39 |
+
display: flex;
|
| 40 |
+
flex-direction: column;
|
| 41 |
+
position: fixed;
|
| 42 |
+
top: 0;
|
| 43 |
+
left: 0;
|
| 44 |
+
padding-bottom: env(safe-area-inset-bottom);
|
| 45 |
+
overflow: hidden;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
.chat-header h2 {
|
| 49 |
+
font-size: 20px;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.chat-body {
|
| 53 |
+
font-size: 16px;
|
| 54 |
+
padding: 12px;
|
| 55 |
+
overflow-y: auto;
|
| 56 |
+
flex: 1 1 auto;
|
| 57 |
+
min-height: 0;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
#user-input {
|
| 61 |
+
font-size: 16px;
|
| 62 |
+
padding: 12px;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
#send-btn {
|
| 66 |
+
font-size: 16px;
|
| 67 |
+
padding: 12px 16px;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.quick-options {
|
| 71 |
+
display: flex;
|
| 72 |
+
flex-wrap: wrap;
|
| 73 |
+
justify-content: space-between;
|
| 74 |
+
padding: 10px;
|
| 75 |
+
background: #f4f4f9;
|
| 76 |
+
border-top: 1px solid #ddd;
|
| 77 |
+
position: sticky;
|
| 78 |
+
bottom: 0;
|
| 79 |
+
left: 0;
|
| 80 |
+
right: 0;
|
| 81 |
+
max-height: 45vh;
|
| 82 |
+
overflow-y: auto;
|
| 83 |
+
flex-shrink: 0;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.option-btn {
|
| 87 |
+
flex: 1 1 48%;
|
| 88 |
+
font-size: 16px;
|
| 89 |
+
margin: 6px 1%;
|
| 90 |
+
padding: 14px;
|
| 91 |
+
white-space: normal;
|
| 92 |
+
word-break: break-word;
|
| 93 |
+
text-align: center;
|
| 94 |
+
box-sizing: border-box;
|
| 95 |
+
}
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
@supports (-webkit-touch-callout: none) {
|
| 99 |
+
@media (max-width: 700px) {
|
| 100 |
+
.chatbot-container {
|
| 101 |
+
height: 88svh;
|
| 102 |
+
height: 88vh;
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
.chatbot-container {
|
| 108 |
+
background: #fff;
|
| 109 |
+
display: flex;
|
| 110 |
+
flex-direction: column;
|
| 111 |
+
overflow: hidden;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.chat-header {
|
| 115 |
+
background: linear-gradient(135deg, #6366f1, #3b82f6);
|
| 116 |
+
color: white;
|
| 117 |
+
padding: 12px;
|
| 118 |
+
display: flex;
|
| 119 |
+
justify-content: space-between;
|
| 120 |
+
align-items: center;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
.chat-header h2 {
|
| 124 |
+
margin: 0;
|
| 125 |
+
font-size: 16px;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.github-btn {
|
| 129 |
+
display: flex;
|
| 130 |
+
align-items: center;
|
| 131 |
+
justify-content: center;
|
| 132 |
+
margin-left: auto;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
.github-btn img {
|
| 136 |
+
width: 26px;
|
| 137 |
+
height: 26px;
|
| 138 |
+
cursor: pointer;
|
| 139 |
+
filter: invert(1);
|
| 140 |
+
transition: transform 0.2s ease;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
.github-btn img:hover {
|
| 144 |
+
transform: scale(1.2);
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
.chat-body {
|
| 148 |
+
flex: 1 1 auto;
|
| 149 |
+
padding: 10px;
|
| 150 |
+
overflow-y: auto;
|
| 151 |
+
background: #fafafa;
|
| 152 |
+
display: flex;
|
| 153 |
+
flex-direction: column;
|
| 154 |
+
min-height: 0;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.bot-msg {
|
| 158 |
+
background: #e5edff;
|
| 159 |
+
padding: 8px 12px;
|
| 160 |
+
border-radius: 10px;
|
| 161 |
+
margin: 4px 0;
|
| 162 |
+
max-width: 90%;
|
| 163 |
+
line-height: 1.4;
|
| 164 |
+
display: inline-block;
|
| 165 |
+
word-wrap: break-word;
|
| 166 |
+
overflow-wrap: anywhere;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
.user-msg {
|
| 170 |
+
background: #d1fae5;
|
| 171 |
+
padding: 8px 12px;
|
| 172 |
+
border-radius: 10px;
|
| 173 |
+
margin: 4px 0;
|
| 174 |
+
max-width: 90%;
|
| 175 |
+
align-self: flex-start;
|
| 176 |
+
text-align: left;
|
| 177 |
+
line-height: 1.4;
|
| 178 |
+
display: inline-block;
|
| 179 |
+
word-wrap: break-word;
|
| 180 |
+
overflow-wrap: anywhere;
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
.thinking {
|
| 184 |
+
font-style: italic;
|
| 185 |
+
color: #666;
|
| 186 |
+
background: #f3f4f6;
|
| 187 |
+
animation: blink 1.2s infinite;
|
| 188 |
+
align-self: flex-start;
|
| 189 |
+
text-align: left;
|
| 190 |
+
line-height: 1.3;
|
| 191 |
+
display: inline-block;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
@keyframes blink {
|
| 195 |
+
0% { opacity: 0.4; }
|
| 196 |
+
50% { opacity: 1; }
|
| 197 |
+
100% { opacity: 0.4; }
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
.chat-footer {
|
| 201 |
+
display: flex;
|
| 202 |
+
padding: 10px;
|
| 203 |
+
border-top: 1px solid #ddd;
|
| 204 |
+
background: #f9f9f9;
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
#user-input {
|
| 208 |
+
flex: 1;
|
| 209 |
+
padding: 8px;
|
| 210 |
+
border: 1px solid #ccc;
|
| 211 |
+
border-radius: 8px;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
#send-btn {
|
| 215 |
+
margin-left: 6px;
|
| 216 |
+
padding: 8px 12px;
|
| 217 |
+
background: #6366f1;
|
| 218 |
+
color: white;
|
| 219 |
+
border: none;
|
| 220 |
+
border-radius: 8px;
|
| 221 |
+
cursor: pointer;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
.quick-options {
|
| 225 |
+
display: flex;
|
| 226 |
+
flex-wrap: wrap;
|
| 227 |
+
padding: 10px;
|
| 228 |
+
background: #f4f4f9;
|
| 229 |
+
border-top: 1px solid #ddd;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
.option-btn {
|
| 233 |
+
flex: 1 1 100%;
|
| 234 |
+
background: #eef2ff;
|
| 235 |
+
border: none;
|
| 236 |
+
padding: 10px;
|
| 237 |
+
margin: 6px 0;
|
| 238 |
+
border-radius: 8px;
|
| 239 |
+
cursor: pointer;
|
| 240 |
+
transition: 0.2s;
|
| 241 |
+
text-align: center;
|
| 242 |
+
word-wrap: break-word;
|
| 243 |
+
white-space: normal;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
.option-btn:hover {
|
| 247 |
+
background: #dbeafe;
|
| 248 |
+
}
|
templates/index.html
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Pranav Reddy Portfolio Chatbot</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/style.css?v=6">
|
| 8 |
+
</head>
|
| 9 |
+
<body>
|
| 10 |
+
<div class="chatbot-container">
|
| 11 |
+
<div class="chat-header">
|
| 12 |
+
<h2>Ask Pranav</h2>
|
| 13 |
+
|
| 14 |
+
<a href="https://github.com/ka1817" target="_blank" class="github-btn">
|
| 15 |
+
<img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/github/github-original.svg" alt="GitHub">
|
| 16 |
+
</a>
|
| 17 |
+
</div>
|
| 18 |
+
|
| 19 |
+
<div class="chat-body" id="chat-body">
|
| 20 |
+
<div class="bot-msg">👋 Hi! I’m Pranav Reddy’s assistant. What would you like to know?</div>
|
| 21 |
+
</div>
|
| 22 |
+
|
| 23 |
+
<div class="chat-footer">
|
| 24 |
+
<input type="text" id="user-input" placeholder="Ask a question..." />
|
| 25 |
+
<button id="send-btn">➤</button>
|
| 26 |
+
</div>
|
| 27 |
+
|
| 28 |
+
<div class="quick-options">
|
| 29 |
+
<button class="option-btn" data-query="what are Pranav Reddy’s Projects-BigBasket Samrt Cart and Netflex churn Prediction">
|
| 30 |
+
📂 Project Details
|
| 31 |
+
</button>
|
| 32 |
+
<button class="option-btn" data-query="How can I contact Pranav Reddy?">
|
| 33 |
+
📞 Contact Details
|
| 34 |
+
</button>
|
| 35 |
+
<button class="option-btn" data-query="What are Pranav Reddy’s Skills?">
|
| 36 |
+
💡 Skills
|
| 37 |
+
</button>
|
| 38 |
+
<button class="option-btn" data-query="Share Pranav Reddy’s Work Experience">
|
| 39 |
+
💼 Experience
|
| 40 |
+
</button>
|
| 41 |
+
</div>
|
| 42 |
+
</div>
|
| 43 |
+
|
| 44 |
+
<script>
|
| 45 |
+
const chatBody = document.getElementById("chat-body");
|
| 46 |
+
const userInput = document.getElementById("user-input");
|
| 47 |
+
const sendBtn = document.getElementById("send-btn");
|
| 48 |
+
const optionButtons = document.querySelectorAll(".option-btn");
|
| 49 |
+
|
| 50 |
+
function appendMessage(sender, text, extraClass = "") {
|
| 51 |
+
const msg = document.createElement("div");
|
| 52 |
+
msg.classList.add(sender === "user" ? "user-msg" : "bot-msg");
|
| 53 |
+
if (extraClass) msg.classList.add(extraClass);
|
| 54 |
+
msg.textContent = text;
|
| 55 |
+
chatBody.appendChild(msg);
|
| 56 |
+
chatBody.scrollTop = chatBody.scrollHeight;
|
| 57 |
+
return msg;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
async function sendMessage(query) {
|
| 61 |
+
appendMessage("user", query);
|
| 62 |
+
userInput.value = "";
|
| 63 |
+
|
| 64 |
+
const thinkingMsg = appendMessage("bot", "🤔 Thinking...", "thinking");
|
| 65 |
+
|
| 66 |
+
try {
|
| 67 |
+
const res = await fetch("/predict", {
|
| 68 |
+
method: "POST",
|
| 69 |
+
headers: { "Content-Type": "application/json" },
|
| 70 |
+
body: JSON.stringify({ query }),
|
| 71 |
+
});
|
| 72 |
+
|
| 73 |
+
const data = await res.json();
|
| 74 |
+
|
| 75 |
+
chatBody.removeChild(thinkingMsg);
|
| 76 |
+
|
| 77 |
+
appendMessage("bot", data.response);
|
| 78 |
+
} catch (error) {
|
| 79 |
+
chatBody.removeChild(thinkingMsg);
|
| 80 |
+
appendMessage("bot", "⚠️ Error fetching response.");
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
sendBtn.addEventListener("click", () => {
|
| 85 |
+
if (userInput.value.trim()) {
|
| 86 |
+
sendMessage(userInput.value.trim());
|
| 87 |
+
}
|
| 88 |
+
});
|
| 89 |
+
|
| 90 |
+
userInput.addEventListener("keypress", (e) => {
|
| 91 |
+
if (e.key === "Enter" && userInput.value.trim()) {
|
| 92 |
+
sendMessage(userInput.value.trim());
|
| 93 |
+
}
|
| 94 |
+
});
|
| 95 |
+
|
| 96 |
+
optionButtons.forEach((btn) => {
|
| 97 |
+
btn.addEventListener("click", () => {
|
| 98 |
+
const query = btn.getAttribute("data-query");
|
| 99 |
+
sendMessage(query);
|
| 100 |
+
});
|
| 101 |
+
});
|
| 102 |
+
</script>
|
| 103 |
+
</body>
|
| 104 |
+
</html>
|