Commit
·
7616a39
0
Parent(s):
Initial commit
Browse files- .gitignore +77 -0
- Dockerfile +33 -0
- README.md +223 -0
- app/__init__.py +10 -0
- app/config.py +53 -0
- app/main.py +265 -0
- app/models.py +112 -0
- app/rag/__init__.py +0 -0
- app/rag/db.py +16 -0
- app/rag/embeddings.py +71 -0
- app/rag/logging_config.py +13 -0
- app/rag/routes.py +140 -0
- app/rag/schemas.py +45 -0
- app/rag/utils.py +136 -0
- app/run_server.py +20 -0
- app/services.py +306 -0
- requirements.txt +12 -0
.gitignore
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# Virtual environment
|
| 7 |
+
venv/
|
| 8 |
+
env/
|
| 9 |
+
.myenv/
|
| 10 |
+
.myenv*/ # just in case named differently
|
| 11 |
+
.env
|
| 12 |
+
.env.*
|
| 13 |
+
|
| 14 |
+
# VS Code
|
| 15 |
+
.vscode/
|
| 16 |
+
|
| 17 |
+
# Pytest
|
| 18 |
+
.pytest_cache/
|
| 19 |
+
|
| 20 |
+
# Jupyter Notebook
|
| 21 |
+
.ipynb_checkpoints/
|
| 22 |
+
|
| 23 |
+
# FastAPI / Uvicorn logs
|
| 24 |
+
*.log
|
| 25 |
+
|
| 26 |
+
# Cache
|
| 27 |
+
.cache/
|
| 28 |
+
*.sqlite3
|
| 29 |
+
|
| 30 |
+
# Streamlit specific
|
| 31 |
+
.streamlit/config.toml
|
| 32 |
+
.streamlit/secrets.toml
|
| 33 |
+
|
| 34 |
+
# FAISS vector store
|
| 35 |
+
*.faiss*
|
| 36 |
+
*.pkl
|
| 37 |
+
*.index
|
| 38 |
+
|
| 39 |
+
# OS-specific
|
| 40 |
+
.DS_Store
|
| 41 |
+
Thumbs.db
|
| 42 |
+
|
| 43 |
+
# Docker artifacts
|
| 44 |
+
__pycache__/
|
| 45 |
+
*.tar
|
| 46 |
+
*.pid
|
| 47 |
+
*.sock
|
| 48 |
+
*.db
|
| 49 |
+
*.log
|
| 50 |
+
|
| 51 |
+
# Python egg metadata
|
| 52 |
+
*.egg-info/
|
| 53 |
+
*.egg
|
| 54 |
+
|
| 55 |
+
# Build artifacts
|
| 56 |
+
build/
|
| 57 |
+
dist/
|
| 58 |
+
*.egg-info/
|
| 59 |
+
|
| 60 |
+
# Coverage reports
|
| 61 |
+
htmlcov/
|
| 62 |
+
.coverage
|
| 63 |
+
.tox/
|
| 64 |
+
|
| 65 |
+
# Test artifacts
|
| 66 |
+
tests/__pycache__/
|
| 67 |
+
*.cover
|
| 68 |
+
|
| 69 |
+
# IDEs
|
| 70 |
+
.idea/
|
| 71 |
+
*.iml
|
| 72 |
+
|
| 73 |
+
# Node.js modules
|
| 74 |
+
node_modules/
|
| 75 |
+
|
| 76 |
+
# Custom virtual environments
|
| 77 |
+
myenv/myenv/
|
Dockerfile
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use the official Python 3.11 slim image as a base
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Prevent Python from buffering stdout/stderr (so logs appear immediately)
|
| 5 |
+
ENV PYTHONUNBUFFERED=1
|
| 6 |
+
|
| 7 |
+
# Install system dependencies needed for certain Python packages (e.g., faiss, PyTorch CPU wheels)
|
| 8 |
+
RUN apt-get update && \
|
| 9 |
+
apt-get install -y --no-install-recommends \
|
| 10 |
+
build-essential \
|
| 11 |
+
git \
|
| 12 |
+
libgomp1 \
|
| 13 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
+
|
| 15 |
+
# Set the working directory inside the container
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Copy only requirements first to leverage Docker layer caching
|
| 19 |
+
COPY requirements.txt .
|
| 20 |
+
|
| 21 |
+
# Upgrade pip, install all Python dependencies, then install PyTorch CPU wheels
|
| 22 |
+
RUN pip install --upgrade pip && \
|
| 23 |
+
pip install -r requirements.txt && \
|
| 24 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
| 25 |
+
|
| 26 |
+
# Copy the rest of the application code
|
| 27 |
+
COPY . .
|
| 28 |
+
|
| 29 |
+
# Expose port 8000 for the FastAPI app
|
| 30 |
+
EXPOSE 8000
|
| 31 |
+
|
| 32 |
+
# By default, run uvicorn to serve the FastAPI app
|
| 33 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Here’s an updated `README.md` for your `MAAS` project, reflecting the expanded functionality and project structure that now includes the RAG-based chat system in addition to PageSpeed insights and Gemini-based analysis.
|
| 2 |
+
|
| 3 |
+
---
|
| 4 |
+
|
| 5 |
+
# MAAS API (Metrics & AI-Assisted Suggestions)
|
| 6 |
+
|
| 7 |
+
A professional FastAPI application that offers two core services:
|
| 8 |
+
|
| 9 |
+
1. **PageSpeed Performance Reports** – Using Google's PageSpeed Insights and Gemini AI for analysis and recommendations.
|
| 10 |
+
2. **RAG-Powered Chat System** – Retrieval-Augmented Generation (RAG) chat sessions with document ingestion, vectorstore indexing (FAISS), and persistent chat history (MongoDB).
|
| 11 |
+
|
| 12 |
+
## ✨ Features
|
| 13 |
+
|
| 14 |
+
* 🔍 PageSpeed Insights integration for web performance metrics
|
| 15 |
+
* 🤖 Gemini AI–powered optimization report generation
|
| 16 |
+
* 📚 Document ingestion and chunked embedding with FAISS
|
| 17 |
+
* 💬 RAG-based conversational system per user and chat session
|
| 18 |
+
* 📄 Clean modular FastAPI architecture
|
| 19 |
+
* 🛠️ Configuration via environment variables
|
| 20 |
+
* 🔐 Secure, with input validation and API key protection
|
| 21 |
+
* 📈 Built-in health check, detailed logging, and auto-generated API docs
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
## 🗂 Project Structure
|
| 26 |
+
|
| 27 |
+
```
|
| 28 |
+
MAAS/
|
| 29 |
+
├── app/
|
| 30 |
+
│ ├── rag/ # RAG module for document ingestion and chat
|
| 31 |
+
│ │ ├── db.py
|
| 32 |
+
│ │ ├── embedding.py
|
| 33 |
+
│ │ ├── routes.py # RAG API endpoints
|
| 34 |
+
│ │ ├── schemas.py
|
| 35 |
+
│ │ └── utils.py
|
| 36 |
+
│ ├── config.py # Environment & settings
|
| 37 |
+
│ ├── main.py # FastAPI app instance & routers
|
| 38 |
+
│ ├── models.py # Pydantic models
|
| 39 |
+
│ ├── run_server.py # Server runner
|
| 40 |
+
│ └── services.py # PageSpeed + Gemini logic
|
| 41 |
+
├── Dockerfile # Optional containerization
|
| 42 |
+
├── requirements.txt # Dependencies
|
| 43 |
+
└── README.md # You're reading it
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
---
|
| 47 |
+
|
| 48 |
+
## 🚀 Getting Started
|
| 49 |
+
|
| 50 |
+
### 1. Install Dependencies
|
| 51 |
+
|
| 52 |
+
```bash
|
| 53 |
+
pip install -r requirements.txt
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
### 2. Create a `.env` file
|
| 57 |
+
|
| 58 |
+
```env
|
| 59 |
+
PAGESPEED_API_KEY=your_pagespeed_api_key_here
|
| 60 |
+
GEMINI_API_KEY=your_gemini_api_key_here
|
| 61 |
+
MONGO_URI=mongodb://localhost:27017
|
| 62 |
+
HOST=0.0.0.0
|
| 63 |
+
PORT=8000
|
| 64 |
+
DEBUG=True
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
### 3. Run the Application
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
# Option 1: Using the script
|
| 71 |
+
python run_server.py
|
| 72 |
+
|
| 73 |
+
# Option 2: Directly with uvicorn
|
| 74 |
+
uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
## 📘 API Overview
|
| 80 |
+
|
| 81 |
+
### 🔗 General
|
| 82 |
+
|
| 83 |
+
| Method | Endpoint | Description |
|
| 84 |
+
| ------ | --------- | ------------------------------ |
|
| 85 |
+
| GET | `/` | Welcome + links to docs/health |
|
| 86 |
+
| GET | `/health` | Health check and uptime |
|
| 87 |
+
|
| 88 |
+
---
|
| 89 |
+
|
| 90 |
+
### 🧠 PageSpeed + Gemini Endpoints
|
| 91 |
+
|
| 92 |
+
| Method | Endpoint | Description |
|
| 93 |
+
| ------ | ---------------------- | --------------------------------- |
|
| 94 |
+
| POST | `/pagespeed` | Fetch raw PageSpeed Insights JSON |
|
| 95 |
+
| POST | `/generate-report` | Generate AI optimization report |
|
| 96 |
+
| POST | `/generate-priorities` | Rank optimizations by priority |
|
| 97 |
+
|
| 98 |
+
---
|
| 99 |
+
|
| 100 |
+
### 📚 RAG Chat System Endpoints
|
| 101 |
+
|
| 102 |
+
| Method | Endpoint | Description |
|
| 103 |
+
| ------ | ------------------------------- | ------------------------------------------ |
|
| 104 |
+
| POST | `/rag/ingest/{user_id}` | Ingest documents and store FAISS index |
|
| 105 |
+
| POST | `/rag/chat/create/{user_id}` | Start a new chat session (returns chat ID) |
|
| 106 |
+
| POST | `/rag/chat/{user_id}/{chat_id}` | Ask a question in an existing chat session |
|
| 107 |
+
|
| 108 |
+
---
|
| 109 |
+
|
| 110 |
+
## 📎 RAG Workflow
|
| 111 |
+
|
| 112 |
+
1. **Ingest Documents**
|
| 113 |
+
|
| 114 |
+
* POST `/rag/ingest/{user_id}`
|
| 115 |
+
* Body: `{"documents": ["doc 1 text", "doc 2 text", ...]}`
|
| 116 |
+
|
| 117 |
+
2. **Create Chat**
|
| 118 |
+
|
| 119 |
+
* POST `/rag/chat/create/{user_id}`
|
| 120 |
+
* Response: `chat_id`
|
| 121 |
+
|
| 122 |
+
3. **Ask Questions**
|
| 123 |
+
|
| 124 |
+
* POST `/rag/chat/{user_id}/{chat_id}`
|
| 125 |
+
* Body: `{"question": "What does the document say about X?"}`
|
| 126 |
+
|
| 127 |
+
---
|
| 128 |
+
|
| 129 |
+
## 🛠 Example Usage (Python)
|
| 130 |
+
|
| 131 |
+
```python
|
| 132 |
+
import requests
|
| 133 |
+
|
| 134 |
+
# Ingest docs
|
| 135 |
+
requests.post("http://localhost:8000/rag/ingest/user123", json={
|
| 136 |
+
"documents": ["The capital of France is Paris.", "Python is a programming language."]
|
| 137 |
+
})
|
| 138 |
+
|
| 139 |
+
# Create chat
|
| 140 |
+
res = requests.post("http://localhost:8000/rag/chat/create/user123")
|
| 141 |
+
chat_id = res.json()["chat_id"]
|
| 142 |
+
|
| 143 |
+
# Chat
|
| 144 |
+
requests.post(f"http://localhost:8000/rag/chat/user123/{chat_id}", json={
|
| 145 |
+
"question": "What is the capital of France?"
|
| 146 |
+
})
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## 📄 API Docs
|
| 152 |
+
|
| 153 |
+
Once the app is running:
|
| 154 |
+
|
| 155 |
+
* Swagger UI: [http://localhost:8000/docs](http://localhost:8000/docs)
|
| 156 |
+
* ReDoc: [http://localhost:8000/redoc](http://localhost:8000/redoc)
|
| 157 |
+
|
| 158 |
+
---
|
| 159 |
+
|
| 160 |
+
## 🛡️ Error Handling
|
| 161 |
+
|
| 162 |
+
* `400 Bad Request`: Invalid input
|
| 163 |
+
* `404 Not Found`: Unknown endpoint or missing user/chat/doc
|
| 164 |
+
* `500 Internal Server Error`: API or service errors
|
| 165 |
+
|
| 166 |
+
---
|
| 167 |
+
|
| 168 |
+
## 🧪 Development Tips
|
| 169 |
+
|
| 170 |
+
* Use `DEBUG=True` in `.env` for auto-reload and verbose logs
|
| 171 |
+
* Modify `CORS` policy in `main.py` before production
|
| 172 |
+
* Use `logger` calls to trace errors or logic flows
|
| 173 |
+
|
| 174 |
+
---
|
| 175 |
+
|
| 176 |
+
## 🌍 API Key Setup
|
| 177 |
+
|
| 178 |
+
### PageSpeed Insights
|
| 179 |
+
|
| 180 |
+
1. [Google Cloud Console](https://console.cloud.google.com/)
|
| 181 |
+
2. Enable the API, generate a key
|
| 182 |
+
|
| 183 |
+
### Gemini AI
|
| 184 |
+
|
| 185 |
+
1. [Google AI Studio](https://makersuite.google.com/)
|
| 186 |
+
2. Create API Key
|
| 187 |
+
|
| 188 |
+
Add both to your `.env`.
|
| 189 |
+
|
| 190 |
+
---
|
| 191 |
+
|
| 192 |
+
## 📦 Docker Support
|
| 193 |
+
|
| 194 |
+
Basic Dockerfile is included. To build and run:
|
| 195 |
+
|
| 196 |
+
```bash
|
| 197 |
+
docker build -t maas-api .
|
| 198 |
+
docker run -p 8000:8000 --env-file .env maas-api
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
---
|
| 202 |
+
|
| 203 |
+
## 🤝 Contributing
|
| 204 |
+
|
| 205 |
+
1. Follow existing modular structure
|
| 206 |
+
2. Document all new endpoints clearly
|
| 207 |
+
3. Test edge cases (e.g., malformed docs or bad chat IDs)
|
| 208 |
+
4. Use logging for traceability
|
| 209 |
+
5. Create clear, typed Pydantic schemas
|
| 210 |
+
|
| 211 |
+
---
|
| 212 |
+
|
| 213 |
+
## 📜 License
|
| 214 |
+
|
| 215 |
+
Licensed under the MIT License.
|
| 216 |
+
|
| 217 |
+
---
|
| 218 |
+
|
| 219 |
+
## 🔗 Repository
|
| 220 |
+
|
| 221 |
+
[https://github.com/Hammadwakeel/MAAS](https://github.com/Hammadwakeel/MAAS)
|
| 222 |
+
|
| 223 |
+
---
|
app/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PageSpeed Insights Report Generator API
|
| 3 |
+
|
| 4 |
+
A professional FastAPI application for generating detailed PageSpeed Insights
|
| 5 |
+
reports using Google's APIs and Gemini AI for advanced analysis.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
__version__ = "1.0.0"
|
| 9 |
+
__author__ = "Hammad Wakeel"
|
| 10 |
+
__email__ = "hammadshah71200@gmail.com"
|
app/config.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 4 |
+
|
| 5 |
+
# Load environment variables from .env
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
class Settings(BaseSettings):
|
| 9 |
+
"""Application settings loaded from environment variables."""
|
| 10 |
+
|
| 11 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 12 |
+
# Google API Keys
|
| 13 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 14 |
+
pagespeed_api_key: str = os.getenv("PAGESPEED_API_KEY", "")
|
| 15 |
+
gemini_api_key: str = os.getenv("GEMINI_API_KEY", "")
|
| 16 |
+
|
| 17 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 18 |
+
# Chat & RAG Configuration
|
| 19 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 20 |
+
groq_api_key: str = os.getenv("GROQ_API_KEY", "")
|
| 21 |
+
vectorstore_base_path: str = os.getenv("VECTORSTORE_BASE_PATH", "./vectorstores")
|
| 22 |
+
|
| 23 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 24 |
+
# MongoDB Configuration (Local)
|
| 25 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 26 |
+
mongo_uri: str = os.getenv("MONGO_URI", "mongodb://localhost:27017")
|
| 27 |
+
mongo_chat_db: str = os.getenv("MONGO_CHAT_DB", "Education_chatbot")
|
| 28 |
+
mongo_chat_collection: str = os.getenv("MONGO_CHAT_COLLECTION", "chat_histories")
|
| 29 |
+
|
| 30 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 31 |
+
# FastAPI Server Configuration
|
| 32 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 33 |
+
host: str = os.getenv("HOST", "0.0.0.0")
|
| 34 |
+
port: int = int(os.getenv("PORT", "8000"))
|
| 35 |
+
debug: bool = os.getenv("DEBUG", "False").lower() == "true"
|
| 36 |
+
|
| 37 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 38 |
+
# App Metadata (unchanged)
|
| 39 |
+
# ───────────────────────────────────────────────────────────────────────────
|
| 40 |
+
app_name: str = "PageSpeed Insights Report Generator"
|
| 41 |
+
app_version: str = "1.0.0"
|
| 42 |
+
app_description: str = (
|
| 43 |
+
"Professional API for generating PageSpeed Insights reports "
|
| 44 |
+
"using Google's APIs and Gemini AI"
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
model_config = SettingsConfigDict(
|
| 48 |
+
env_file=".env",
|
| 49 |
+
env_file_encoding="utf-8"
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Instantiate settings
|
| 53 |
+
settings = Settings()
|
app/main.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Main FastAPI application module.
|
| 3 |
+
"""
|
| 4 |
+
import time
|
| 5 |
+
import logging
|
| 6 |
+
import json
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from fastapi import FastAPI, HTTPException, Depends
|
| 9 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
+
from fastapi.responses import JSONResponse
|
| 11 |
+
from contextlib import asynccontextmanager
|
| 12 |
+
|
| 13 |
+
from app.config import settings
|
| 14 |
+
from app.models import (
|
| 15 |
+
PageSpeedRequest,
|
| 16 |
+
PageSpeedDataResponse,
|
| 17 |
+
ReportRequest,
|
| 18 |
+
ReportResponse,
|
| 19 |
+
HealthResponse,
|
| 20 |
+
PriorityRequest,
|
| 21 |
+
PriorityResponse
|
| 22 |
+
)
|
| 23 |
+
from app.services import PageSpeedService
|
| 24 |
+
from app.rag.routes import router as rag_router
|
| 25 |
+
|
| 26 |
+
# ------------------------
|
| 27 |
+
# Configure root logger
|
| 28 |
+
# ------------------------
|
| 29 |
+
logger = logging.getLogger("app")
|
| 30 |
+
logger.setLevel(logging.INFO)
|
| 31 |
+
|
| 32 |
+
handler = logging.StreamHandler()
|
| 33 |
+
formatter = logging.Formatter(
|
| 34 |
+
"%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
| 35 |
+
datefmt="%Y-%m-%d %H:%M:%S"
|
| 36 |
+
)
|
| 37 |
+
handler.setFormatter(formatter)
|
| 38 |
+
logger.addHandler(handler)
|
| 39 |
+
|
| 40 |
+
# Global variable to track startup time
|
| 41 |
+
startup_time = None
|
| 42 |
+
|
| 43 |
+
@asynccontextmanager
|
| 44 |
+
async def lifespan(app: FastAPI):
|
| 45 |
+
"""Application lifespan manager."""
|
| 46 |
+
global startup_time
|
| 47 |
+
startup_time = time.time()
|
| 48 |
+
logger.info("🚀 Starting %s v%s", settings.app_name, settings.app_version)
|
| 49 |
+
logger.info("📊 Server running on %s:%s", settings.host, settings.port)
|
| 50 |
+
yield
|
| 51 |
+
logger.info("📊 Shutting down %s", settings.app_name)
|
| 52 |
+
|
| 53 |
+
# Create FastAPI app instance
|
| 54 |
+
app = FastAPI(
|
| 55 |
+
title=settings.app_name,
|
| 56 |
+
description=settings.app_description,
|
| 57 |
+
version=settings.app_version,
|
| 58 |
+
lifespan=lifespan,
|
| 59 |
+
docs_url="/docs",
|
| 60 |
+
redoc_url="/redoc"
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Mount RAG router
|
| 64 |
+
app.include_router(rag_router)
|
| 65 |
+
|
| 66 |
+
# Add CORS middleware
|
| 67 |
+
app.add_middleware(
|
| 68 |
+
CORSMiddleware,
|
| 69 |
+
allow_origins=["*"], # In production, specify exact origins
|
| 70 |
+
allow_credentials=True,
|
| 71 |
+
allow_methods=["*"],
|
| 72 |
+
allow_headers=["*"],
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Dependency to get PageSpeed service
|
| 76 |
+
def get_pagespeed_service() -> PageSpeedService:
|
| 77 |
+
"""Dependency to get a new PageSpeedService instance."""
|
| 78 |
+
return PageSpeedService()
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
@app.get("/", response_model=dict)
|
| 82 |
+
async def root():
|
| 83 |
+
"""Root endpoint with API information."""
|
| 84 |
+
return {
|
| 85 |
+
"message": f"Welcome to {settings.app_name}",
|
| 86 |
+
"version": settings.app_version,
|
| 87 |
+
"description": settings.app_description,
|
| 88 |
+
"docs": "/docs",
|
| 89 |
+
"health": "/health"
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
@app.get("/health", response_model=HealthResponse)
|
| 94 |
+
async def health_check():
|
| 95 |
+
"""Health check endpoint."""
|
| 96 |
+
global startup_time
|
| 97 |
+
|
| 98 |
+
if startup_time:
|
| 99 |
+
uptime_seconds = time.time() - startup_time
|
| 100 |
+
uptime_str = f"{uptime_seconds:.2f} seconds"
|
| 101 |
+
else:
|
| 102 |
+
uptime_str = "Unknown"
|
| 103 |
+
|
| 104 |
+
return HealthResponse(
|
| 105 |
+
status="healthy",
|
| 106 |
+
version=settings.app_version,
|
| 107 |
+
uptime=uptime_str
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
@app.post("/pagespeed", response_model=PageSpeedDataResponse)
|
| 112 |
+
async def fetch_pagespeed(
|
| 113 |
+
request: PageSpeedRequest,
|
| 114 |
+
service: PageSpeedService = Depends(get_pagespeed_service)
|
| 115 |
+
):
|
| 116 |
+
"""
|
| 117 |
+
Fetch raw PageSpeed Insights data for a given URL.
|
| 118 |
+
|
| 119 |
+
Request body:
|
| 120 |
+
{
|
| 121 |
+
"url": "https://www.example.com"
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
Returns:
|
| 125 |
+
{
|
| 126 |
+
"success": true,
|
| 127 |
+
"url": "https://www.example.com",
|
| 128 |
+
"pagespeed_data": { ... },
|
| 129 |
+
"error": null
|
| 130 |
+
}
|
| 131 |
+
"""
|
| 132 |
+
url_str = str(request.url)
|
| 133 |
+
logger.info("Received POST /pagespeed for URL: %s", url_str)
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
pagespeed_data = service.get_pagespeed_data(url_str)
|
| 137 |
+
logger.info("Returning PageSpeed data for %s", url_str)
|
| 138 |
+
return PageSpeedDataResponse(
|
| 139 |
+
success=True,
|
| 140 |
+
url=url_str,
|
| 141 |
+
pagespeed_data=pagespeed_data,
|
| 142 |
+
error=None
|
| 143 |
+
)
|
| 144 |
+
except Exception as e:
|
| 145 |
+
logger.error("Error in /pagespeed endpoint for URL %s: %s", url_str, e, exc_info=True)
|
| 146 |
+
return PageSpeedDataResponse(
|
| 147 |
+
success=False,
|
| 148 |
+
url=url_str,
|
| 149 |
+
pagespeed_data=None,
|
| 150 |
+
error=str(e)
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
@app.post("/generate-report", response_model=ReportResponse)
|
| 155 |
+
async def generate_report(
|
| 156 |
+
body: ReportRequest,
|
| 157 |
+
service: PageSpeedService = Depends(get_pagespeed_service)
|
| 158 |
+
):
|
| 159 |
+
"""
|
| 160 |
+
Generate a Gemini-based optimization report from previously-fetched PageSpeed JSON.
|
| 161 |
+
|
| 162 |
+
Request body:
|
| 163 |
+
{
|
| 164 |
+
"pagespeed_data": { …full PageSpeed JSON… }
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
Returns:
|
| 168 |
+
{
|
| 169 |
+
"success": true,
|
| 170 |
+
"report": "Gemini-generated analysis…",
|
| 171 |
+
"error": null
|
| 172 |
+
}
|
| 173 |
+
"""
|
| 174 |
+
logger.info("Received POST /generate-report")
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
pagespeed_data = body.pagespeed_data
|
| 178 |
+
logger.debug("PageSpeed JSON payload size: %d bytes", len(str(pagespeed_data)))
|
| 179 |
+
|
| 180 |
+
report_text = service.generate_report_with_gemini(pagespeed_data)
|
| 181 |
+
logger.info("Returning Gemini report.")
|
| 182 |
+
return ReportResponse(
|
| 183 |
+
success=True,
|
| 184 |
+
report=report_text,
|
| 185 |
+
error=None
|
| 186 |
+
)
|
| 187 |
+
except Exception as e:
|
| 188 |
+
logger.error("Error in /generate-report endpoint: %s", e, exc_info=True)
|
| 189 |
+
return ReportResponse(
|
| 190 |
+
success=False,
|
| 191 |
+
report=None,
|
| 192 |
+
error=str(e)
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
@app.post("/generate-priorities", response_model=PriorityResponse)
|
| 197 |
+
async def generate_priorities(
|
| 198 |
+
request: PriorityRequest,
|
| 199 |
+
service: PageSpeedService = Depends(get_pagespeed_service)
|
| 200 |
+
):
|
| 201 |
+
"""
|
| 202 |
+
Generate a prioritized list of performance improvements from a Gemini report.
|
| 203 |
+
|
| 204 |
+
Request body:
|
| 205 |
+
{
|
| 206 |
+
"report": "Full Gemini-generated performance report..."
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
{
|
| 211 |
+
"success": true,
|
| 212 |
+
"priorities": {
|
| 213 |
+
"High": ["Optimize TBT by reducing JS execution", ...],
|
| 214 |
+
"Medium": [...],
|
| 215 |
+
"Low": [...]
|
| 216 |
+
},
|
| 217 |
+
"error": null
|
| 218 |
+
}
|
| 219 |
+
"""
|
| 220 |
+
logger.info("Received POST /generate-priorities")
|
| 221 |
+
try:
|
| 222 |
+
priorities = service.generate_priority(request.report)
|
| 223 |
+
return PriorityResponse(success=True, priorities=priorities)
|
| 224 |
+
except Exception as e:
|
| 225 |
+
logger.error("Error in /generate-priorities: %s", e, exc_info=True)
|
| 226 |
+
return PriorityResponse(success=False, priorities=None, error=str(e))
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
@app.exception_handler(404)
|
| 230 |
+
async def not_found_handler(request, exc):
|
| 231 |
+
"""Custom 404 handler."""
|
| 232 |
+
logger.warning("404 Not Found: %s %s", request.method, request.url.path)
|
| 233 |
+
return JSONResponse(
|
| 234 |
+
status_code=404,
|
| 235 |
+
content={
|
| 236 |
+
"error": "Not Found",
|
| 237 |
+
"message": "The requested endpoint was not found",
|
| 238 |
+
"docs": "/docs"
|
| 239 |
+
}
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
@app.exception_handler(500)
|
| 244 |
+
async def internal_error_handler(request, exc):
|
| 245 |
+
"""Custom 500 handler."""
|
| 246 |
+
logger.error("500 Internal Server Error: %s %s -> %s", request.method, request.url.path, exc, exc_info=True)
|
| 247 |
+
return JSONResponse(
|
| 248 |
+
status_code=500,
|
| 249 |
+
content={
|
| 250 |
+
"error": "Internal Server Error",
|
| 251 |
+
"message": "An unexpected error occurred",
|
| 252 |
+
"timestamp": datetime.now().isoformat()
|
| 253 |
+
}
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
if __name__ == "__main__":
|
| 258 |
+
import uvicorn
|
| 259 |
+
# When running directly, uvicorn will print its own logs. We just start it here.
|
| 260 |
+
uvicorn.run(
|
| 261 |
+
"app.main:app",
|
| 262 |
+
host=settings.host,
|
| 263 |
+
port=settings.port,
|
| 264 |
+
reload=settings.debug
|
| 265 |
+
)
|
app/models.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/models.py
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Pydantic models for request/response validation.
|
| 5 |
+
"""
|
| 6 |
+
from pydantic import BaseModel, HttpUrl, Field
|
| 7 |
+
from typing import Optional, Dict, Any, List
|
| 8 |
+
|
| 9 |
+
class PageSpeedRequest(BaseModel):
|
| 10 |
+
"""Request model for fetching PageSpeed data."""
|
| 11 |
+
url: HttpUrl = Field(
|
| 12 |
+
...,
|
| 13 |
+
description="The URL to analyze for PageSpeed insights",
|
| 14 |
+
example="https://www.example.com"
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
class Config:
|
| 18 |
+
json_schema_extra = {
|
| 19 |
+
"example": {
|
| 20 |
+
"url": "https://www.ocoya.com/"
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
class PageSpeedDataResponse(BaseModel):
|
| 25 |
+
"""Response model that returns only the raw PageSpeed data."""
|
| 26 |
+
success: bool = Field(
|
| 27 |
+
...,
|
| 28 |
+
description="Whether the PageSpeed fetch was successful"
|
| 29 |
+
)
|
| 30 |
+
url: str = Field(
|
| 31 |
+
...,
|
| 32 |
+
description="The analyzed URL"
|
| 33 |
+
)
|
| 34 |
+
pagespeed_data: Optional[Dict[Any, Any]] = Field(
|
| 35 |
+
None,
|
| 36 |
+
description="Raw PageSpeed Insights data"
|
| 37 |
+
)
|
| 38 |
+
error: Optional[str] = Field(
|
| 39 |
+
None,
|
| 40 |
+
description="Error message if fetching failed"
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
class ReportRequest(BaseModel):
|
| 44 |
+
"""
|
| 45 |
+
Request model for generating a Gemini report.
|
| 46 |
+
Expects the entire raw PageSpeed JSON payload in the body.
|
| 47 |
+
"""
|
| 48 |
+
pagespeed_data: Dict[Any, Any] = Field(
|
| 49 |
+
...,
|
| 50 |
+
description="Raw PageSpeed Insights data (JSON) previously fetched",
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
class Config:
|
| 54 |
+
schema_extra = {
|
| 55 |
+
"example": {
|
| 56 |
+
"pagespeed_data": {
|
| 57 |
+
# (Truncated example; in practice this would be
|
| 58 |
+
# the full runPagespeed v5 JSON structure)
|
| 59 |
+
"lighthouseResult": {
|
| 60 |
+
"audits": {
|
| 61 |
+
"first-contentful-paint": {"numericValue": 1234},
|
| 62 |
+
"largest-contentful-paint": {"numericValue": 2345}
|
| 63 |
+
}
|
| 64 |
+
},
|
| 65 |
+
"loadingExperience": {
|
| 66 |
+
"metrics": {
|
| 67 |
+
"FIRST_CONTENTFUL_PAINT_MS": {"percentile": 1200, "category": "FAST"}
|
| 68 |
+
}
|
| 69 |
+
}
|
| 70 |
+
# …etc.
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
class ReportResponse(BaseModel):
|
| 76 |
+
"""Response model that returns only the Gemini-generated report."""
|
| 77 |
+
success: bool = Field(
|
| 78 |
+
...,
|
| 79 |
+
description="Whether report generation was successful"
|
| 80 |
+
)
|
| 81 |
+
report: Optional[str] = Field(
|
| 82 |
+
None,
|
| 83 |
+
description="Gemini-generated performance optimization report"
|
| 84 |
+
)
|
| 85 |
+
error: Optional[str] = Field(
|
| 86 |
+
None,
|
| 87 |
+
description="Error message if report generation failed"
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
class HealthResponse(BaseModel):
|
| 91 |
+
"""Health check response model."""
|
| 92 |
+
status: str = Field(
|
| 93 |
+
...,
|
| 94 |
+
description="Health status of the API"
|
| 95 |
+
)
|
| 96 |
+
version: str = Field(
|
| 97 |
+
...,
|
| 98 |
+
description="API version"
|
| 99 |
+
)
|
| 100 |
+
uptime: str = Field(
|
| 101 |
+
...,
|
| 102 |
+
description="API uptime"
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
class PriorityRequest(BaseModel):
|
| 106 |
+
report: str
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
class PriorityResponse(BaseModel):
|
| 110 |
+
success: bool
|
| 111 |
+
priorities: Optional[Dict[str, List[str]]] = None
|
| 112 |
+
error: Optional[str] = None
|
app/rag/__init__.py
ADDED
|
File without changes
|
app/rag/db.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pymongo import MongoClient
|
| 2 |
+
from app.config import settings
|
| 3 |
+
|
| 4 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 5 |
+
# MongoDB Initialization
|
| 6 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 7 |
+
|
| 8 |
+
# Connect to MongoDB using the URI from app/config.py
|
| 9 |
+
mongo_client = MongoClient(settings.mongo_uri)
|
| 10 |
+
mongo_db = mongo_client[settings.mongo_chat_db]
|
| 11 |
+
|
| 12 |
+
# Collection to store metadata that maps user_id → vectorstore_path
|
| 13 |
+
vectorstore_meta_coll = mongo_db["vectorstore_metadata"]
|
| 14 |
+
|
| 15 |
+
# Name of the collection that MongoDBChatMessageHistory will write to
|
| 16 |
+
chat_collection_name = settings.mongo_chat_collection
|
app/rag/embeddings.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain.prompts import ChatPromptTemplate
|
| 5 |
+
|
| 6 |
+
def get_llm():
|
| 7 |
+
"""
|
| 8 |
+
Returns a ChatGroq LLM instance (Llama 3.3 70B) using the GROQ API key
|
| 9 |
+
stored in the environment.
|
| 10 |
+
"""
|
| 11 |
+
from langchain_groq import ChatGroq
|
| 12 |
+
|
| 13 |
+
llm = ChatGroq(
|
| 14 |
+
model="meta-llama/llama-4-scout-17b-16e-instruct",
|
| 15 |
+
temperature=0,
|
| 16 |
+
max_tokens=1024,
|
| 17 |
+
api_key=os.getenv("GROQ_API_KEY", "") # Put your actual GROQ key in .env as GROQ_API_KEY
|
| 18 |
+
)
|
| 19 |
+
return llm
|
| 20 |
+
|
| 21 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 22 |
+
# 1. Text Splitter (512 tokens per chunk, 100 token overlap)
|
| 23 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 24 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=100)
|
| 25 |
+
|
| 26 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 27 |
+
# 2. Embeddings Model (HuggingFace BGE) on CPU
|
| 28 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 32 |
+
|
| 33 |
+
from huggingface_hub import login
|
| 34 |
+
|
| 35 |
+
login(HF_TOKEN)
|
| 36 |
+
|
| 37 |
+
model_name = "BAAI/bge-small-en-v1.5"
|
| 38 |
+
model_kwargs = {"device": "cpu"}
|
| 39 |
+
encode_kwargs = {"normalize_embeddings": True}
|
| 40 |
+
embeddings = HuggingFaceBgeEmbeddings(
|
| 41 |
+
model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
|
| 42 |
+
)
|
| 43 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 44 |
+
# 3. Prompt Template for RAG Assistant
|
| 45 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 46 |
+
prompt_template = """
|
| 47 |
+
You are an assistant specialized in analyzing and improving website performance. Your goal is to provide accurate, practical, and performance-driven answers.
|
| 48 |
+
Use the following retrieved context (such as PageSpeed Insights data or audit results) to answer the user's question.
|
| 49 |
+
If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
|
| 50 |
+
|
| 51 |
+
Guidelines:
|
| 52 |
+
1. Extract relevant performance insights from the context to form a helpful and actionable response.
|
| 53 |
+
2. Maintain a clear, professional, and user-focused tone.
|
| 54 |
+
3. If the question is unclear or needs more detail, ask for clarification politely.
|
| 55 |
+
4. Prioritize recommendations that follow web performance best practices (e.g., optimizing load times, reducing blocking resources, improving visual stability).
|
| 56 |
+
|
| 57 |
+
Retrieved context:
|
| 58 |
+
{context}
|
| 59 |
+
|
| 60 |
+
User's question:
|
| 61 |
+
{question}
|
| 62 |
+
|
| 63 |
+
Your response:
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
user_prompt = ChatPromptTemplate.from_messages(
|
| 67 |
+
[
|
| 68 |
+
("system", prompt_template),
|
| 69 |
+
("human", "{question}"),
|
| 70 |
+
]
|
| 71 |
+
)
|
app/rag/logging_config.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
|
| 3 |
+
# Configure a module‐level logger for RAG components
|
| 4 |
+
logger = logging.getLogger("app.rag")
|
| 5 |
+
logger.setLevel(logging.INFO)
|
| 6 |
+
|
| 7 |
+
handler = logging.StreamHandler()
|
| 8 |
+
formatter = logging.Formatter(
|
| 9 |
+
"%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
| 10 |
+
datefmt="%Y-%m-%d %H:%M:%S"
|
| 11 |
+
)
|
| 12 |
+
handler.setFormatter(formatter)
|
| 13 |
+
logger.addHandler(handler)
|
app/rag/routes.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import uuid
|
| 3 |
+
from fastapi import APIRouter, HTTPException
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
from .schemas import (
|
| 7 |
+
IngestRequest,
|
| 8 |
+
IngestResponse,
|
| 9 |
+
CreateChatResponse,
|
| 10 |
+
ChatRequest,
|
| 11 |
+
ChatResponse
|
| 12 |
+
)
|
| 13 |
+
from .utils import (
|
| 14 |
+
text_splitter,
|
| 15 |
+
embeddings,
|
| 16 |
+
get_vectorstore_path,
|
| 17 |
+
save_vectorstore_to_disk,
|
| 18 |
+
upsert_vectorstore_metadata,
|
| 19 |
+
build_or_load_vectorstore,
|
| 20 |
+
build_rag_chain,
|
| 21 |
+
initialize_chat_history
|
| 22 |
+
)
|
| 23 |
+
from .logging_config import logger
|
| 24 |
+
|
| 25 |
+
router = APIRouter(prefix="/rag", tags=["rag"])
|
| 26 |
+
|
| 27 |
+
@router.post("/ingest/{user_id}", response_model=IngestResponse)
|
| 28 |
+
async def ingest_documents(user_id: str, body: IngestRequest):
|
| 29 |
+
"""
|
| 30 |
+
Ingest a list of text documents into a FAISS vectorstore for this user.
|
| 31 |
+
Steps:
|
| 32 |
+
1. Concatenate all documents into one string.
|
| 33 |
+
2. Split into chunks using RecursiveCharacterTextSplitter.
|
| 34 |
+
3. Create a FAISS vectorstore from those chunks.
|
| 35 |
+
4. Save the vectorstore to disk under ./vectorstores/{user_id}/faiss_index.
|
| 36 |
+
5. Upsert metadata in Mongo (user_id -> vectorstore_path).
|
| 37 |
+
"""
|
| 38 |
+
logger.info("Ingestion requested for user_id=%s. Number of docs=%d", user_id, len(body.documents))
|
| 39 |
+
try:
|
| 40 |
+
# 1. Join all provided documents
|
| 41 |
+
all_text = "\n\n".join(body.documents)
|
| 42 |
+
|
| 43 |
+
# 2. Split into chunks
|
| 44 |
+
text_chunks = text_splitter.split_text(all_text)
|
| 45 |
+
logger.info("Split into %d chunks", len(text_chunks))
|
| 46 |
+
|
| 47 |
+
# 3. Build FAISS vectorstore
|
| 48 |
+
from langchain.vectorstores import FAISS as _FAISS
|
| 49 |
+
vs = _FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 50 |
+
|
| 51 |
+
# 4. Save to disk
|
| 52 |
+
faiss_path = save_vectorstore_to_disk(vs, user_id)
|
| 53 |
+
logger.info("Saved FAISS index to %s", faiss_path)
|
| 54 |
+
|
| 55 |
+
# 5. Upsert metadata
|
| 56 |
+
upsert_vectorstore_metadata(user_id, faiss_path)
|
| 57 |
+
logger.info("Upserted vectorstore metadata for user_id=%s", user_id)
|
| 58 |
+
|
| 59 |
+
return IngestResponse(
|
| 60 |
+
success=True,
|
| 61 |
+
message="Vectorstore created successfully.",
|
| 62 |
+
user_id=user_id,
|
| 63 |
+
vectorstore_path=faiss_path
|
| 64 |
+
)
|
| 65 |
+
except Exception as e:
|
| 66 |
+
logger.error("Error during ingestion for user_id=%s: %s", user_id, e, exc_info=True)
|
| 67 |
+
raise HTTPException(status_code=500, detail=f"Ingestion failed: {e}")
|
| 68 |
+
|
| 69 |
+
@router.post("/chat/create/{user_id}", response_model=CreateChatResponse)
|
| 70 |
+
async def create_chat_session(user_id: str):
|
| 71 |
+
"""
|
| 72 |
+
Create a new chat session for this user:
|
| 73 |
+
- Generate a chat_id (UUID).
|
| 74 |
+
- Initialize an empty MongoDBChatMessageHistory for that chat_id.
|
| 75 |
+
- Return the chat_id so the client can use it in subsequent calls.
|
| 76 |
+
"""
|
| 77 |
+
logger.info("Creating new chat session for user_id=%s", user_id)
|
| 78 |
+
try:
|
| 79 |
+
chat_id = str(uuid.uuid4())
|
| 80 |
+
|
| 81 |
+
# Initialize chat history (this writes an empty session to Mongo)
|
| 82 |
+
_ = initialize_chat_history(chat_id)
|
| 83 |
+
logger.info("Created chat history in Mongo for chat_id=%s", chat_id)
|
| 84 |
+
|
| 85 |
+
return CreateChatResponse(
|
| 86 |
+
success=True,
|
| 87 |
+
message="Chat session created.",
|
| 88 |
+
user_id=user_id,
|
| 89 |
+
chat_id=chat_id
|
| 90 |
+
)
|
| 91 |
+
except Exception as e:
|
| 92 |
+
logger.error("Error creating chat for user_id=%s: %s", user_id, e, exc_info=True)
|
| 93 |
+
raise HTTPException(status_code=500, detail=f"Failed to create chat session: {e}")
|
| 94 |
+
|
| 95 |
+
@router.post("/chat/{user_id}/{chat_id}", response_model=ChatResponse)
|
| 96 |
+
async def chat_with_user(user_id: str, chat_id: str, body: ChatRequest):
|
| 97 |
+
"""
|
| 98 |
+
Send a user question to the RAG chain and return the LLM answer.
|
| 99 |
+
- Loads the FAISS index for user_id (404 if not found).
|
| 100 |
+
- Retrieves (or initializes) the MongoDBChatMessageHistory for chat_id.
|
| 101 |
+
- Runs the ConversationalRetrievalChain to get an answer.
|
| 102 |
+
- Returns the answer, plus re‐stores chat history in Mongo automatically.
|
| 103 |
+
"""
|
| 104 |
+
question = body.question
|
| 105 |
+
logger.info("Received chat request: user_id=%s, chat_id=%s, question='%s'", user_id, chat_id, question)
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
# 1. Build the RAG chain (or 404 if no vectorstore)
|
| 109 |
+
chain = build_rag_chain(user_id, chat_id)
|
| 110 |
+
|
| 111 |
+
# 2. Call the chain
|
| 112 |
+
result = chain.invoke({"question": question})
|
| 113 |
+
# Some chains use "answer", some use "output_text"
|
| 114 |
+
answer = result.get("answer") or result.get("output_text") or None
|
| 115 |
+
|
| 116 |
+
if answer is None:
|
| 117 |
+
logger.error("Chain returned no 'answer' or 'output_text': %s", result)
|
| 118 |
+
raise Exception("Failed to retrieve answer from chain.")
|
| 119 |
+
|
| 120 |
+
logger.info("Chain answered for chat_id=%s: %s", chat_id, answer)
|
| 121 |
+
|
| 122 |
+
return ChatResponse(
|
| 123 |
+
success=True,
|
| 124 |
+
answer=answer,
|
| 125 |
+
error=None,
|
| 126 |
+
chat_id=chat_id,
|
| 127 |
+
user_id=user_id
|
| 128 |
+
)
|
| 129 |
+
except HTTPException:
|
| 130 |
+
# Re‐raise known HTTPExceptions (e.g. 404 from build_rag_chain)
|
| 131 |
+
raise
|
| 132 |
+
except Exception as e:
|
| 133 |
+
logger.error("Error in chat endpoint for user_id=%s, chat_id=%s: %s", user_id, chat_id, e, exc_info=True)
|
| 134 |
+
return ChatResponse(
|
| 135 |
+
success=False,
|
| 136 |
+
answer=None,
|
| 137 |
+
error=str(e),
|
| 138 |
+
chat_id=chat_id,
|
| 139 |
+
user_id=user_id
|
| 140 |
+
)
|
app/rag/schemas.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
|
| 4 |
+
class IngestRequest(BaseModel):
|
| 5 |
+
"""
|
| 6 |
+
Request body for ingesting documents into a user's FAISS vector store.
|
| 7 |
+
"""
|
| 8 |
+
documents: List[str] = Field(
|
| 9 |
+
...,
|
| 10 |
+
description="A list of text documents (strings) to ingest into the vector store."
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
class IngestResponse(BaseModel):
|
| 14 |
+
"""
|
| 15 |
+
Response after ingesting documents for a user.
|
| 16 |
+
"""
|
| 17 |
+
success: bool
|
| 18 |
+
message: str
|
| 19 |
+
user_id: str
|
| 20 |
+
vectorstore_path: Optional[str] = None
|
| 21 |
+
|
| 22 |
+
class CreateChatResponse(BaseModel):
|
| 23 |
+
"""
|
| 24 |
+
Response after creating a new chat session for a user.
|
| 25 |
+
"""
|
| 26 |
+
success: bool
|
| 27 |
+
message: str
|
| 28 |
+
user_id: str
|
| 29 |
+
chat_id: Optional[str] = None
|
| 30 |
+
|
| 31 |
+
class ChatRequest(BaseModel):
|
| 32 |
+
"""
|
| 33 |
+
Body for sending a user message to an existing chat session.
|
| 34 |
+
"""
|
| 35 |
+
question: str = Field(..., description="The user's question or message.")
|
| 36 |
+
|
| 37 |
+
class ChatResponse(BaseModel):
|
| 38 |
+
"""
|
| 39 |
+
Response from the RAG chatbot endpoint.
|
| 40 |
+
"""
|
| 41 |
+
success: bool
|
| 42 |
+
answer: Optional[str] = None
|
| 43 |
+
error: Optional[str] = None
|
| 44 |
+
chat_id: str
|
| 45 |
+
user_id: str
|
app/rag/utils.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Optional, Dict, Any
|
| 3 |
+
from fastapi import HTTPException
|
| 4 |
+
|
| 5 |
+
from langchain_community.vectorstores import FAISS
|
| 6 |
+
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
|
| 7 |
+
from langchain.memory import ConversationBufferMemory # ← IMPORT THIS
|
| 8 |
+
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
+
|
| 10 |
+
from app.config import settings
|
| 11 |
+
from .db import vectorstore_meta_coll, chat_collection_name
|
| 12 |
+
from .embeddings import embeddings, text_splitter, user_prompt, get_llm
|
| 13 |
+
from .logging_config import logger
|
| 14 |
+
|
| 15 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 16 |
+
# 1. Helper: Path to Store (or Load) a User's FAISS Vectorstore on Disk
|
| 17 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 18 |
+
def get_vectorstore_path(user_id: str) -> str:
|
| 19 |
+
"""
|
| 20 |
+
Ensure a local directory exists for this user's vectorstore.
|
| 21 |
+
Returns a path like './vectorstores/{user_id}'.
|
| 22 |
+
"""
|
| 23 |
+
base_dir = settings.vectorstore_base_path
|
| 24 |
+
user_dir = os.path.join(base_dir, user_id)
|
| 25 |
+
os.makedirs(user_dir, exist_ok=True)
|
| 26 |
+
return user_dir
|
| 27 |
+
|
| 28 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 29 |
+
# 2. Build or Load an Existing FAISS Index for a User
|
| 30 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 31 |
+
def build_or_load_vectorstore(user_id: str) -> FAISS:
|
| 32 |
+
"""
|
| 33 |
+
Attempt to load an existing FAISS index for this user.
|
| 34 |
+
If not found on disk, raise a FileNotFoundError.
|
| 35 |
+
"""
|
| 36 |
+
user_dir = get_vectorstore_path(user_id)
|
| 37 |
+
faiss_index_path = os.path.join(user_dir, "faiss_index")
|
| 38 |
+
|
| 39 |
+
if not os.path.isdir(faiss_index_path):
|
| 40 |
+
raise FileNotFoundError(f"No vectorstore found at {faiss_index_path}")
|
| 41 |
+
|
| 42 |
+
# Allow loading your own index via pickle
|
| 43 |
+
return FAISS.load_local(
|
| 44 |
+
folder_path=faiss_index_path,
|
| 45 |
+
embeddings=embeddings,
|
| 46 |
+
allow_dangerous_deserialization=True
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 50 |
+
# 3. Save a FAISS Vectorstore to Disk for a User
|
| 51 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 52 |
+
def save_vectorstore_to_disk(vectorstore: FAISS, user_id: str) -> str:
|
| 53 |
+
"""
|
| 54 |
+
Save the FAISS vectorstore under './vectorstores/{user_id}/faiss_index'.
|
| 55 |
+
Returns the path to that saved folder.
|
| 56 |
+
"""
|
| 57 |
+
user_dir = get_vectorstore_path(user_id)
|
| 58 |
+
faiss_index_path = os.path.join(user_dir, "faiss_index")
|
| 59 |
+
os.makedirs(faiss_index_path, exist_ok=True)
|
| 60 |
+
vectorstore.save_local(folder_path=faiss_index_path)
|
| 61 |
+
return faiss_index_path
|
| 62 |
+
|
| 63 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 64 |
+
# 4. Upsert or Fetch Vectorstore Metadata in MongoDB
|
| 65 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 66 |
+
def upsert_vectorstore_metadata(user_id: str, vectorstore_path: str) -> None:
|
| 67 |
+
"""
|
| 68 |
+
Insert or update a document mapping user_id → vectorstore_path in MongoDB.
|
| 69 |
+
"""
|
| 70 |
+
vectorstore_meta_coll.update_one(
|
| 71 |
+
{"user_id": user_id},
|
| 72 |
+
{"$set": {"vectorstore_path": vectorstore_path}},
|
| 73 |
+
upsert=True
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
def get_vectorstore_metadata(user_id: str) -> Optional[Dict[str, Any]]:
|
| 77 |
+
"""
|
| 78 |
+
Retrieve the metadata doc (if any) for this user_id.
|
| 79 |
+
"""
|
| 80 |
+
return vectorstore_meta_coll.find_one({"user_id": user_id})
|
| 81 |
+
|
| 82 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 83 |
+
# 5. Initialize (or Return) a MongoDBChatMessageHistory for chat_id
|
| 84 |
+
# ────────────────────────────────���─────────────────────────────────────────────
|
| 85 |
+
def initialize_chat_history(chat_id: str) -> MongoDBChatMessageHistory:
|
| 86 |
+
"""
|
| 87 |
+
Create and return a MongoDBChatMessageHistory for the given chat_id.
|
| 88 |
+
"""
|
| 89 |
+
return MongoDBChatMessageHistory(
|
| 90 |
+
session_id=chat_id,
|
| 91 |
+
connection_string=settings.mongo_uri,
|
| 92 |
+
database_name=settings.mongo_chat_db,
|
| 93 |
+
collection_name=chat_collection_name,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 97 |
+
# 6. Build a ConversationalRetrievalChain (RAG Chain) for user_id + chat_id
|
| 98 |
+
# ──────────────────────────────────────────────────────────────────────────────
|
| 99 |
+
def build_rag_chain(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
|
| 100 |
+
"""
|
| 101 |
+
- Loads the FAISS index for user_id.
|
| 102 |
+
- Creates a retriever (k=3).
|
| 103 |
+
- Wraps MongoDBChatMessageHistory in a ConversationBufferMemory.
|
| 104 |
+
- Attaches the ChatGroq LLM + user_prompt.
|
| 105 |
+
"""
|
| 106 |
+
# 1. Load FAISS index (or 404 if not found)
|
| 107 |
+
try:
|
| 108 |
+
faiss_vs = build_or_load_vectorstore(user_id)
|
| 109 |
+
except FileNotFoundError:
|
| 110 |
+
raise HTTPException(status_code=404, detail="Vectorstore not found for this user. Call /rag/ingest first.")
|
| 111 |
+
|
| 112 |
+
retriever = faiss_vs.as_retriever(search_kwargs={"k": 5})
|
| 113 |
+
|
| 114 |
+
# 2. Instantiate a MongoDB-based chat history
|
| 115 |
+
chat_history = initialize_chat_history(chat_id)
|
| 116 |
+
|
| 117 |
+
# 3. Wrap that history in a ConversationBufferMemory, so the chain gets a valid "Memory" object
|
| 118 |
+
memory = ConversationBufferMemory(
|
| 119 |
+
memory_key="chat_history", # how the chain will reference the stored chat messages
|
| 120 |
+
chat_history=chat_history # THIS tells the memory to use your MongoDB store
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# 4. Get the LLM
|
| 124 |
+
llm = get_llm()
|
| 125 |
+
|
| 126 |
+
# 5. Build the ConversationalRetrievalChain with the wrapped memory
|
| 127 |
+
chain = ConversationalRetrievalChain.from_llm(
|
| 128 |
+
llm=llm,
|
| 129 |
+
retriever=retriever,
|
| 130 |
+
memory=memory, # ← pass the ConversationBufferMemory here
|
| 131 |
+
return_source_documents=False,
|
| 132 |
+
chain_type="stuff",
|
| 133 |
+
combine_docs_chain_kwargs={"prompt": user_prompt},
|
| 134 |
+
verbose=False,
|
| 135 |
+
)
|
| 136 |
+
return chain
|
app/run_server.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Server runner script for the PageSpeed Insights API.
|
| 3 |
+
"""
|
| 4 |
+
import uvicorn
|
| 5 |
+
from app.config import settings
|
| 6 |
+
|
| 7 |
+
if __name__ == "__main__":
|
| 8 |
+
print(f"🚀 Starting {settings.app_name}")
|
| 9 |
+
print(f"📍 Server: {settings.host}:{settings.port}")
|
| 10 |
+
print(f"🔧 Debug Mode: {settings.debug}")
|
| 11 |
+
print(f"📚 API Documentation: http://{settings.host}:{settings.port}/docs")
|
| 12 |
+
print(f"📋 Alternative Docs: http://{settings.host}:{settings.port}/redoc")
|
| 13 |
+
|
| 14 |
+
uvicorn.run(
|
| 15 |
+
"app.main:app",
|
| 16 |
+
host=settings.host,
|
| 17 |
+
port=settings.port,
|
| 18 |
+
reload=settings.debug,
|
| 19 |
+
log_level="info" if not settings.debug else "debug"
|
| 20 |
+
)
|
app/services.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Business logic services for PageSpeed analysis.
|
| 3 |
+
"""
|
| 4 |
+
import json
|
| 5 |
+
import requests
|
| 6 |
+
import logging
|
| 7 |
+
import google.generativeai as genai
|
| 8 |
+
from typing import Dict, Any
|
| 9 |
+
from app.config import settings
|
| 10 |
+
|
| 11 |
+
# Create a module-level logger
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class PageSpeedService:
|
| 16 |
+
"""Service class for PageSpeed Insights operations."""
|
| 17 |
+
|
| 18 |
+
def __init__(self):
|
| 19 |
+
self.pagespeed_api_key = settings.pagespeed_api_key
|
| 20 |
+
self.gemini_api_key = settings.gemini_api_key
|
| 21 |
+
|
| 22 |
+
if self.gemini_api_key:
|
| 23 |
+
logger.info("Configuring Gemini AI with provided API key.")
|
| 24 |
+
genai.configure(api_key=self.gemini_api_key)
|
| 25 |
+
else:
|
| 26 |
+
logger.warning("No Gemini API key found. Gemini reporting will fail if called.")
|
| 27 |
+
|
| 28 |
+
def get_pagespeed_data(self, target_url: str) -> Dict[Any, Any]:
|
| 29 |
+
"""
|
| 30 |
+
Fetch data from the PageSpeed Insights API for the given URL.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
target_url (str): The URL to analyze
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
Dict[Any, Any]: PageSpeed Insights data
|
| 37 |
+
|
| 38 |
+
Raises:
|
| 39 |
+
Exception: If API request fails
|
| 40 |
+
"""
|
| 41 |
+
logger.info("Starting PageSpeed fetch for URL: %s", target_url)
|
| 42 |
+
if not self.pagespeed_api_key:
|
| 43 |
+
msg = "PageSpeed API key not configured"
|
| 44 |
+
logger.error(msg)
|
| 45 |
+
raise Exception(msg)
|
| 46 |
+
|
| 47 |
+
endpoint = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
|
| 48 |
+
params = {
|
| 49 |
+
"url": target_url,
|
| 50 |
+
"key": self.pagespeed_api_key
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
response = requests.get(endpoint, params=params, timeout=60)
|
| 55 |
+
response.raise_for_status()
|
| 56 |
+
logger.info("Successfully fetched PageSpeed data for %s (status %s)", target_url, response.status_code)
|
| 57 |
+
return response.json()
|
| 58 |
+
except requests.exceptions.HTTPError as http_err:
|
| 59 |
+
msg = f"HTTP error fetching PageSpeed data: {http_err}"
|
| 60 |
+
logger.error(msg, exc_info=True)
|
| 61 |
+
raise Exception(msg)
|
| 62 |
+
except requests.exceptions.RequestException as req_err:
|
| 63 |
+
msg = f"Request exception fetching PageSpeed data: {req_err}"
|
| 64 |
+
logger.error(msg, exc_info=True)
|
| 65 |
+
raise Exception(msg)
|
| 66 |
+
except Exception as e:
|
| 67 |
+
msg = f"Unexpected error in get_pagespeed_data: {e}"
|
| 68 |
+
logger.error(msg, exc_info=True)
|
| 69 |
+
raise Exception(msg)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def generate_report_with_gemini(self, pagespeed_data: Dict[Any, Any]) -> str:
|
| 73 |
+
"""
|
| 74 |
+
Uses the Gemini model to generate a detailed report based on the PageSpeed Insights data,
|
| 75 |
+
employing an advanced prompt for specialized analysis and recommendations.
|
| 76 |
+
|
| 77 |
+
Args:
|
| 78 |
+
pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
str: Generated performance optimization report
|
| 82 |
+
|
| 83 |
+
Raises:
|
| 84 |
+
Exception: If report generation fails
|
| 85 |
+
"""
|
| 86 |
+
logger.info("Starting Gemini report generation.")
|
| 87 |
+
if not self.gemini_api_key:
|
| 88 |
+
msg = "Gemini API key not configured"
|
| 89 |
+
logger.error(msg)
|
| 90 |
+
raise Exception(msg)
|
| 91 |
+
|
| 92 |
+
try:
|
| 93 |
+
# Select a Gemini model
|
| 94 |
+
model = genai.GenerativeModel("gemini-2.0-flash")
|
| 95 |
+
prompt = self._create_analysis_prompt(pagespeed_data)
|
| 96 |
+
logger.debug("Generated Gemini prompt: %s", prompt[:200] + "…")
|
| 97 |
+
|
| 98 |
+
response = model.generate_content(prompt)
|
| 99 |
+
|
| 100 |
+
if response and hasattr(response, "text") and response.text:
|
| 101 |
+
logger.info("Gemini report generated successfully.")
|
| 102 |
+
return response.text
|
| 103 |
+
elif response and response.candidates and response.candidates[0].finish_reason == "SAFETY":
|
| 104 |
+
msg = "Report generation was blocked due to safety settings"
|
| 105 |
+
logger.error(msg)
|
| 106 |
+
raise Exception(msg)
|
| 107 |
+
else:
|
| 108 |
+
msg = "No report could be generated or the response was empty"
|
| 109 |
+
logger.error(msg)
|
| 110 |
+
raise Exception(msg)
|
| 111 |
+
|
| 112 |
+
except Exception as e:
|
| 113 |
+
msg = f"Error generating report with Gemini: {e}"
|
| 114 |
+
logger.error(msg, exc_info=True)
|
| 115 |
+
raise Exception(msg)
|
| 116 |
+
|
| 117 |
+
def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
|
| 118 |
+
"""
|
| 119 |
+
Create the specialized prompt for Gemini analysis.
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
|
| 123 |
+
|
| 124 |
+
Returns:
|
| 125 |
+
str: Formatted prompt for Gemini
|
| 126 |
+
"""
|
| 127 |
+
# We do not log full JSON here to avoid huge payload in logs,
|
| 128 |
+
# but we do log that prompt construction is happening.
|
| 129 |
+
logger.debug("Building Gemini analysis prompt from PageSpeed data.")
|
| 130 |
+
return (
|
| 131 |
+
"**Role:** You are an **Expert Web Performance Optimization Analyst and Senior Full-Stack Engineer** "
|
| 132 |
+
"with deep expertise in interpreting Google PageSpeed Insights data, diagnosing frontend and "
|
| 133 |
+
"backend bottlenecks, and devising actionable, high-impact optimization strategies.\n\n"
|
| 134 |
+
"**Objective:**\n"
|
| 135 |
+
"Analyze the provided Google PageSpeed Insights JSON data for the analyzed website. "
|
| 136 |
+
"Your primary goal is to generate a comprehensive, prioritized, and actionable set of strategies "
|
| 137 |
+
"to significantly improve its performance. These strategies must directly address the specific "
|
| 138 |
+
"metrics and audit findings within the report, aiming to elevate both Core Web Vitals "
|
| 139 |
+
"(LCP, INP, CLS) and other key performance indicators (FCP, TTFB, TBT), and ultimately "
|
| 140 |
+
"improve the `overall_category` to 'FAST' where possible.\n\n"
|
| 141 |
+
"**Input Data:**\n"
|
| 142 |
+
"The following JSON object contains the complete PageSpeed Insights report:\n"
|
| 143 |
+
f"```json\n{json.dumps(pagespeed_data, indent=2)}\n```\n\n"
|
| 144 |
+
"**Analysis and Strategy Formulation - Instructions:**\n\n"
|
| 145 |
+
"1. **Executive Performance Summary:**\n"
|
| 146 |
+
" * Begin with a concise overview of the website's current performance status based on the provided data.\n"
|
| 147 |
+
" * Highlight the `overall_category` for both `loadingExperience` (specific URL) and `originLoadingExperience` (entire origin).\n"
|
| 148 |
+
" * Pinpoint the current values and `category` (e.g., FAST, AVERAGE, SLOW) for each key metric:\n"
|
| 149 |
+
" * `CUMULATIVE_LAYOUT_SHIFT_SCORE` (CLS)\n"
|
| 150 |
+
" * `EXPERIMENTAL_TIME_TO_FIRST_BYTE` (TTFB)\n"
|
| 151 |
+
" * `FIRST_CONTENTFUL_PAINT_MS` (FCP)\n"
|
| 152 |
+
" * `INTERACTION_TO_NEXT_PAINT` (INP)\n"
|
| 153 |
+
" * `LARGEST_CONTENTFUL_PAINT_MS` (LCP)\n"
|
| 154 |
+
" * `total-blocking-time` (TBT) from Lighthouse.\n"
|
| 155 |
+
" * Identify any significant `metricSavings` opportunities highlighted in the Lighthouse `audits`.\n\n"
|
| 156 |
+
"2. **Deep-Dive into Bottlenecks & Audit Failures:**\n"
|
| 157 |
+
" * Systematically go through the `loadingExperience`, `originLoadingExperience`, and `lighthouseResult` (especially the `audits` section).\n"
|
| 158 |
+
" * For each underperforming metric or failed/suboptimal audit (e.g., Lighthouse scores less than 1, or `notApplicable` audits with clear improvement paths like `lcp-lazy-loaded`, `critical-request-chains`, `dom-size`, `non-composited-animations`), extract the relevant details, display values, and numeric values.\n\n"
|
| 159 |
+
"3. **Develop Prioritized, Actionable Optimization Strategies:**\n"
|
| 160 |
+
" For *each* identified performance issue or opportunity, provide the following:\n"
|
| 161 |
+
" * **A. Issue & Evidence:** Clearly state the problem (e.g., \"High Total Blocking Time,\" \"Suboptimal Largest Contentful Paint due to unoptimized image,\" \"Excessive DOM Size,\" \"Render-blocking resources in critical request chain\"). Refer directly to the JSON data points and audit IDs that support this finding (e.g., `audits['total-blocking-time'].numericValue`, `audits['critical-request-chains'].details.longestChain`).\n"
|
| 162 |
+
" * **B. Root Cause Analysis (Inferred):** Briefly explain the likely technical reasons behind the issue based on the data.\n"
|
| 163 |
+
" * **C. Specific, Technical Recommendation(s):** Provide detailed, actionable steps a development team can take. Be specific.\n"
|
| 164 |
+
" * **D. Targeted Metric Improvement:** Specify which primary and secondary metrics this strategy will positively impact (e.g., \"This will directly reduce LCP and improve FCP,\" or \"This will significantly lower TBT and improve INP.\").\n"
|
| 165 |
+
" * **E. Priority Level:** Assign a priority (High, Medium, Low) based on:\n"
|
| 166 |
+
" * Impact on Core Web Vitals.\n"
|
| 167 |
+
" * Potential for overall score improvement (consider `metricSavings`).\n"
|
| 168 |
+
" * Severity of the issue (e.g., 'SLOW' or 'AVERAGE' categories).\n"
|
| 169 |
+
" * Estimated implementation effort (favor high-impact, low/medium-effort tasks for higher priority).\n"
|
| 170 |
+
" * **F. Justification for Priority:** Briefly explain why this priority was assigned.\n\n"
|
| 171 |
+
"4. **Strategic Grouping (Optional but Recommended):**\n"
|
| 172 |
+
" If applicable, group recommendations by area (e.g., Asset Optimization, JavaScript Optimization, Server-Side Improvements, Rendering Path Optimization, CSS Enhancements).\n\n"
|
| 173 |
+
"5. **Anticipated Overall Impact:**\n"
|
| 174 |
+
" Conclude with a statement on the anticipated overall improvement in performance and user experience if the high and medium-priority recommendations are implemented.\n\n"
|
| 175 |
+
"**Output Format:**\n"
|
| 176 |
+
"Please structure your response clearly. Use headings, subheadings, and bullet points to enhance readability and actionability. For example:\n\n"
|
| 177 |
+
"---\n"
|
| 178 |
+
"## Executive Performance Summary\n"
|
| 179 |
+
"* **Overall URL Loading Experience Category:** [e.g., AVERAGE]\n"
|
| 180 |
+
"* **Overall Origin Loading Experience Category:** [e.g., AVERAGE]\n"
|
| 181 |
+
"* **Key Metrics:**\n"
|
| 182 |
+
" * LCP: [Value] ms ([Category])\n"
|
| 183 |
+
" * INP: [Value] ms ([Category])\n"
|
| 184 |
+
" * ...etc.\n\n"
|
| 185 |
+
"---\n"
|
| 186 |
+
"## Prioritized Optimization Strategies\n\n"
|
| 187 |
+
"### High Priority\n"
|
| 188 |
+
"**1. Issue & Evidence:** [e.g., High Total Blocking Time (TBT) of 1200 ms - `audits['total-blocking-time'].numericValue`]\n"
|
| 189 |
+
" * **Root Cause Analysis:** [e.g., Long JavaScript tasks on the main thread during page load, likely from unoptimized third-party scripts or complex component rendering.]\n"
|
| 190 |
+
" * **Specific, Technical Recommendation(s):**\n"
|
| 191 |
+
" * [Action 1]\n"
|
| 192 |
+
" * [Action 2]\n"
|
| 193 |
+
" * **Targeted Metric Improvement:** [e.g., TBT, INP, FCP]\n"
|
| 194 |
+
" * **Justification for Priority:** [e.g., Directly impacts interactivity (INP) and is a significant contributor to a poor lab score.]\n\n"
|
| 195 |
+
"**(Continue with other High, Medium, and Low priority items)**\n"
|
| 196 |
+
"---\n\n"
|
| 197 |
+
"**Ensure your analysis is based *solely* on the provided JSON data and your expert interpretation of it. "
|
| 198 |
+
"Avoid generic advice; all recommendations must be tied to specific findings within the report. "
|
| 199 |
+
"Do not add anything irrelevant in the report. Do not write text in the starting of the report**"
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
def analyze_url(self, url: str) -> Dict[str, Any]:
|
| 203 |
+
"""
|
| 204 |
+
Perform complete PageSpeed analysis for a given URL.
|
| 205 |
+
|
| 206 |
+
Args:
|
| 207 |
+
url (str): The URL to analyze
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
Dict[str, Any]: Complete analysis results
|
| 211 |
+
"""
|
| 212 |
+
try:
|
| 213 |
+
# Fetch PageSpeed data
|
| 214 |
+
pagespeed_data = self.get_pagespeed_data(url)
|
| 215 |
+
|
| 216 |
+
# Generate report with Gemini
|
| 217 |
+
report = self.generate_report_with_gemini(pagespeed_data)
|
| 218 |
+
|
| 219 |
+
return {
|
| 220 |
+
"success": True,
|
| 221 |
+
"url": url,
|
| 222 |
+
"report": report,
|
| 223 |
+
"pagespeed_data": pagespeed_data,
|
| 224 |
+
"error": None
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
except Exception as e:
|
| 228 |
+
logger.error("Failed full analyze_url flow: %s", e, exc_info=True)
|
| 229 |
+
return {
|
| 230 |
+
"success": False,
|
| 231 |
+
"url": url,
|
| 232 |
+
"report": None,
|
| 233 |
+
"pagespeed_data": None,
|
| 234 |
+
"error": str(e)
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
def generate_priority(self, report: str) -> Dict[str, Any]:
|
| 238 |
+
"""
|
| 239 |
+
Generate a dictionary of prioritized performance recommendations based on the Gemini-generated report.
|
| 240 |
+
|
| 241 |
+
Args:
|
| 242 |
+
report (str): The Gemini-generated performance report
|
| 243 |
+
|
| 244 |
+
Returns:
|
| 245 |
+
Dict[str, Any]: Dictionary mapping priority levels to optimization suggestions
|
| 246 |
+
|
| 247 |
+
Raises:
|
| 248 |
+
Exception: If the priority generation fails
|
| 249 |
+
"""
|
| 250 |
+
logger.info("Generating prioritized suggestions from the Gemini report.")
|
| 251 |
+
|
| 252 |
+
if not self.gemini_api_key:
|
| 253 |
+
msg = "Gemini API key not configured"
|
| 254 |
+
logger.error(msg)
|
| 255 |
+
raise Exception(msg)
|
| 256 |
+
|
| 257 |
+
try:
|
| 258 |
+
model = genai.GenerativeModel("gemini-2.0-flash")
|
| 259 |
+
|
| 260 |
+
prompt = (
|
| 261 |
+
"You are an expert web performance analyst.\n"
|
| 262 |
+
"Extract and organize the optimization recommendations from the following performance report\n"
|
| 263 |
+
"into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".\n"
|
| 264 |
+
"Each key’s value should be a list of suggestion strings.\n\n"
|
| 265 |
+
"Important:\n"
|
| 266 |
+
"- Respond with *only* a valid JSON object.\n"
|
| 267 |
+
"- Do NOT include any commentary or explanation outside the JSON.\n\n"
|
| 268 |
+
"Performance Report:\n"
|
| 269 |
+
"```\n"
|
| 270 |
+
+ report +
|
| 271 |
+
"\n```"
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
response = model.generate_content(prompt)
|
| 275 |
+
raw = (response.text or "").strip()
|
| 276 |
+
logger.debug("Raw priority response: %s", raw[:500] + ("…" if len(raw) > 500 else ""))
|
| 277 |
+
|
| 278 |
+
# Locate the JSON portion by finding the first '{' and the last '}'
|
| 279 |
+
start = raw.find('{')
|
| 280 |
+
end = raw.rfind('}')
|
| 281 |
+
if start == -1 or end == -1 or end <= start:
|
| 282 |
+
raise ValueError("No JSON object found in Gemini response")
|
| 283 |
+
|
| 284 |
+
json_str = raw[start:end+1]
|
| 285 |
+
logger.debug("Extracted JSON string: %s", json_str)
|
| 286 |
+
|
| 287 |
+
suggestions = json.loads(json_str)
|
| 288 |
+
if not isinstance(suggestions, dict):
|
| 289 |
+
raise ValueError("Parsed JSON is not a dictionary")
|
| 290 |
+
|
| 291 |
+
# Ensure all expected keys exist
|
| 292 |
+
for key in ("high", "medium", "low", "unknown"):
|
| 293 |
+
suggestions.setdefault(key, [])
|
| 294 |
+
|
| 295 |
+
logger.info("Priority suggestions generated successfully.")
|
| 296 |
+
return suggestions
|
| 297 |
+
|
| 298 |
+
except json.JSONDecodeError as je:
|
| 299 |
+
msg = f"Failed to parse JSON from Gemini response: {je}"
|
| 300 |
+
logger.error(msg, exc_info=True)
|
| 301 |
+
raise Exception(msg)
|
| 302 |
+
except Exception as e:
|
| 303 |
+
msg = f"Error generating priority suggestions: {e}"
|
| 304 |
+
logger.error(msg, exc_info=True)
|
| 305 |
+
raise
|
| 306 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.104.1
|
| 2 |
+
uvicorn==0.24.0
|
| 3 |
+
python-dotenv==1.0.0
|
| 4 |
+
requests==2.31.0
|
| 5 |
+
google-generativeai==0.3.2
|
| 6 |
+
pydantic==2.5.0
|
| 7 |
+
pydantic_settings
|
| 8 |
+
langchain_groq
|
| 9 |
+
langchain_community
|
| 10 |
+
faiss-cpu
|
| 11 |
+
pymongo
|
| 12 |
+
langchain-mongodb
|