first commit
Browse files- app/config.py +0 -57
- app/main.py +6 -137
- app/page_speed/__init__.py +0 -0
- app/page_speed/config.py +39 -0
- app/{models.py β page_speed/models.py} +11 -1
- app/page_speed/routes.py +68 -0
- app/{services.py β page_speed/services.py} +68 -97
- app/rag/chat_history.py +22 -1
- app/rag/db.py +1 -1
- app/rag/embeddings.py +0 -1
- app/rag/prompt_library.py +0 -2
- app/rag/routes.py +76 -84
- app/rag/schemas.py +11 -28
- app/rag/utils.py +2 -2
- app/seo/__init__.py +0 -0
- app/seo/models.py +5 -0
- app/seo/routes.py +14 -25
- app/seo/seo_service.py +81 -107
app/config.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 2 |
-
|
| 3 |
-
class Settings(BaseSettings):
|
| 4 |
-
"""Application settings loaded from environment variables."""
|
| 5 |
-
|
| 6 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 7 |
-
# Google API Keys
|
| 8 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
-
pagespeed_api_key: str
|
| 10 |
-
gemini_api_key: str
|
| 11 |
-
|
| 12 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
-
# Chat & RAG Configuration
|
| 14 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
-
groq_api_key: str
|
| 16 |
-
vectorstore_base_path: str = "./vectorstores"
|
| 17 |
-
|
| 18 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
-
# Hugging Face Hub
|
| 20 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
-
huggingfacehub_api_token: str
|
| 22 |
-
|
| 23 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
-
# MongoDB Configuration (Local)
|
| 25 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
-
mongo_uri: str = "mongodb://localhost:27017"
|
| 27 |
-
mongo_chat_db: str = "Education_chatbot"
|
| 28 |
-
mongo_chat_collection: str = "chat_histories"
|
| 29 |
-
|
| 30 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
-
# FastAPI Server Configuration
|
| 32 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
-
host: str = "0.0.0.0"
|
| 34 |
-
port: int = 8000
|
| 35 |
-
debug: bool = False
|
| 36 |
-
|
| 37 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
-
# App Metadata (unchanged)
|
| 39 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
-
app_name: str = "PageSpeed Insights Report Generator"
|
| 41 |
-
app_version: str = "1.0.0"
|
| 42 |
-
app_description: str = (
|
| 43 |
-
"Professional API for generating PageSpeed Insights reports "
|
| 44 |
-
"using Google's APIs and Gemini AI"
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 48 |
-
# Tell Pydantic to load from .env and ignore extras
|
| 49 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
-
model_config = SettingsConfigDict(
|
| 51 |
-
env_file=".env",
|
| 52 |
-
env_file_encoding="utf-8",
|
| 53 |
-
extra="ignore",
|
| 54 |
-
)
|
| 55 |
-
|
| 56 |
-
# Single shared Settings instance
|
| 57 |
-
settings = Settings()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/main.py
CHANGED
|
@@ -5,24 +5,16 @@ import time
|
|
| 5 |
import logging
|
| 6 |
import json
|
| 7 |
from datetime import datetime
|
| 8 |
-
from fastapi import FastAPI
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
from fastapi.responses import JSONResponse
|
| 11 |
from contextlib import asynccontextmanager
|
| 12 |
|
| 13 |
-
from app.config import settings
|
| 14 |
-
from app.models import
|
| 15 |
-
PageSpeedRequest,
|
| 16 |
-
PageSpeedDataResponse,
|
| 17 |
-
ReportRequest,
|
| 18 |
-
ReportResponse,
|
| 19 |
-
HealthResponse,
|
| 20 |
-
PriorityRequest,
|
| 21 |
-
PriorityResponse
|
| 22 |
-
)
|
| 23 |
-
from app.services import PageSpeedService
|
| 24 |
from app.rag.routes import router as rag_router
|
| 25 |
from app.seo import routes as seo_routes
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
# ------------------------
|
|
@@ -67,6 +59,8 @@ app.include_router(rag_router)
|
|
| 67 |
|
| 68 |
app.include_router(seo_routes.router)
|
| 69 |
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# Add CORS middleware
|
| 72 |
app.add_middleware(
|
|
@@ -77,12 +71,6 @@ app.add_middleware(
|
|
| 77 |
allow_headers=["*"],
|
| 78 |
)
|
| 79 |
|
| 80 |
-
# Dependency to get PageSpeed service
|
| 81 |
-
def get_pagespeed_service() -> PageSpeedService:
|
| 82 |
-
"""Dependency to get a new PageSpeedService instance."""
|
| 83 |
-
return PageSpeedService()
|
| 84 |
-
|
| 85 |
-
|
| 86 |
@app.get("/", response_model=dict)
|
| 87 |
async def root():
|
| 88 |
"""Root endpoint with API information."""
|
|
@@ -113,124 +101,6 @@ async def health_check():
|
|
| 113 |
)
|
| 114 |
|
| 115 |
|
| 116 |
-
@app.post("/pagespeed", response_model=PageSpeedDataResponse)
|
| 117 |
-
async def fetch_pagespeed(
|
| 118 |
-
request: PageSpeedRequest,
|
| 119 |
-
service: PageSpeedService = Depends(get_pagespeed_service)
|
| 120 |
-
):
|
| 121 |
-
"""
|
| 122 |
-
Fetch raw PageSpeed Insights data for a given URL.
|
| 123 |
-
|
| 124 |
-
Request body:
|
| 125 |
-
{
|
| 126 |
-
"url": "https://www.example.com"
|
| 127 |
-
}
|
| 128 |
-
|
| 129 |
-
Returns:
|
| 130 |
-
{
|
| 131 |
-
"success": true,
|
| 132 |
-
"url": "https://www.example.com",
|
| 133 |
-
"pagespeed_data": { ... },
|
| 134 |
-
"error": null
|
| 135 |
-
}
|
| 136 |
-
"""
|
| 137 |
-
url_str = str(request.url)
|
| 138 |
-
logger.info("Received POST /pagespeed for URL: %s", url_str)
|
| 139 |
-
|
| 140 |
-
try:
|
| 141 |
-
pagespeed_data = service.get_pagespeed_data(url_str)
|
| 142 |
-
logger.info("Returning PageSpeed data for %s", url_str)
|
| 143 |
-
return PageSpeedDataResponse(
|
| 144 |
-
success=True,
|
| 145 |
-
url=url_str,
|
| 146 |
-
pagespeed_data=pagespeed_data,
|
| 147 |
-
error=None
|
| 148 |
-
)
|
| 149 |
-
except Exception as e:
|
| 150 |
-
logger.error("Error in /pagespeed endpoint for URL %s: %s", url_str, e, exc_info=True)
|
| 151 |
-
return PageSpeedDataResponse(
|
| 152 |
-
success=False,
|
| 153 |
-
url=url_str,
|
| 154 |
-
pagespeed_data=None,
|
| 155 |
-
error=str(e)
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
@app.post("/generate-report", response_model=ReportResponse)
|
| 160 |
-
async def generate_report(
|
| 161 |
-
body: ReportRequest,
|
| 162 |
-
service: PageSpeedService = Depends(get_pagespeed_service)
|
| 163 |
-
):
|
| 164 |
-
"""
|
| 165 |
-
Generate a Gemini-based optimization report from previously-fetched PageSpeed JSON.
|
| 166 |
-
|
| 167 |
-
Request body:
|
| 168 |
-
{
|
| 169 |
-
"pagespeed_data": { β¦full PageSpeed JSONβ¦ }
|
| 170 |
-
}
|
| 171 |
-
|
| 172 |
-
Returns:
|
| 173 |
-
{
|
| 174 |
-
"success": true,
|
| 175 |
-
"report": "Gemini-generated analysisβ¦",
|
| 176 |
-
"error": null
|
| 177 |
-
}
|
| 178 |
-
"""
|
| 179 |
-
logger.info("Received POST /generate-report")
|
| 180 |
-
|
| 181 |
-
try:
|
| 182 |
-
pagespeed_data = body.pagespeed_data
|
| 183 |
-
logger.debug("PageSpeed JSON payload size: %d bytes", len(str(pagespeed_data)))
|
| 184 |
-
|
| 185 |
-
report_text = service.generate_report_with_gemini(pagespeed_data)
|
| 186 |
-
logger.info("Returning Gemini report.")
|
| 187 |
-
return ReportResponse(
|
| 188 |
-
success=True,
|
| 189 |
-
report=report_text,
|
| 190 |
-
error=None
|
| 191 |
-
)
|
| 192 |
-
except Exception as e:
|
| 193 |
-
logger.error("Error in /generate-report endpoint: %s", e, exc_info=True)
|
| 194 |
-
return ReportResponse(
|
| 195 |
-
success=False,
|
| 196 |
-
report=None,
|
| 197 |
-
error=str(e)
|
| 198 |
-
)
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
@app.post("/generate-priorities", response_model=PriorityResponse)
|
| 202 |
-
async def generate_priorities(
|
| 203 |
-
request: PriorityRequest,
|
| 204 |
-
service: PageSpeedService = Depends(get_pagespeed_service)
|
| 205 |
-
):
|
| 206 |
-
"""
|
| 207 |
-
Generate a prioritized list of performance improvements from a Gemini report.
|
| 208 |
-
|
| 209 |
-
Request body:
|
| 210 |
-
{
|
| 211 |
-
"report": "Full Gemini-generated performance report..."
|
| 212 |
-
}
|
| 213 |
-
|
| 214 |
-
Returns:
|
| 215 |
-
{
|
| 216 |
-
"success": true,
|
| 217 |
-
"priorities": {
|
| 218 |
-
"High": ["Optimize TBT by reducing JS execution", ...],
|
| 219 |
-
"Medium": [...],
|
| 220 |
-
"Low": [...]
|
| 221 |
-
},
|
| 222 |
-
"error": null
|
| 223 |
-
}
|
| 224 |
-
"""
|
| 225 |
-
logger.info("Received POST /generate-priorities")
|
| 226 |
-
try:
|
| 227 |
-
priorities = service.generate_priority(request.report)
|
| 228 |
-
return PriorityResponse(success=True, priorities=priorities)
|
| 229 |
-
except Exception as e:
|
| 230 |
-
logger.error("Error in /generate-priorities: %s", e, exc_info=True)
|
| 231 |
-
return PriorityResponse(success=False, priorities=None, error=str(e))
|
| 232 |
-
|
| 233 |
-
|
| 234 |
@app.exception_handler(404)
|
| 235 |
async def not_found_handler(request, exc):
|
| 236 |
"""Custom 404 handler."""
|
|
@@ -244,7 +114,6 @@ async def not_found_handler(request, exc):
|
|
| 244 |
}
|
| 245 |
)
|
| 246 |
|
| 247 |
-
|
| 248 |
@app.exception_handler(500)
|
| 249 |
async def internal_error_handler(request, exc):
|
| 250 |
"""Custom 500 handler."""
|
|
|
|
| 5 |
import logging
|
| 6 |
import json
|
| 7 |
from datetime import datetime
|
| 8 |
+
from fastapi import FastAPI
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
from fastapi.responses import JSONResponse
|
| 11 |
from contextlib import asynccontextmanager
|
| 12 |
|
| 13 |
+
from app.page_speed.config import settings
|
| 14 |
+
from app.page_speed.models import HealthResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
from app.rag.routes import router as rag_router
|
| 16 |
from app.seo import routes as seo_routes
|
| 17 |
+
from app.page_speed import routes as page_speed_routes
|
| 18 |
|
| 19 |
|
| 20 |
# ------------------------
|
|
|
|
| 59 |
|
| 60 |
app.include_router(seo_routes.router)
|
| 61 |
|
| 62 |
+
# Mount PageSpeed router
|
| 63 |
+
app.include_router(page_speed_routes.router)
|
| 64 |
|
| 65 |
# Add CORS middleware
|
| 66 |
app.add_middleware(
|
|
|
|
| 71 |
allow_headers=["*"],
|
| 72 |
)
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
@app.get("/", response_model=dict)
|
| 75 |
async def root():
|
| 76 |
"""Root endpoint with API information."""
|
|
|
|
| 101 |
)
|
| 102 |
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
@app.exception_handler(404)
|
| 105 |
async def not_found_handler(request, exc):
|
| 106 |
"""Custom 404 handler."""
|
|
|
|
| 114 |
}
|
| 115 |
)
|
| 116 |
|
|
|
|
| 117 |
@app.exception_handler(500)
|
| 118 |
async def internal_error_handler(request, exc):
|
| 119 |
"""Custom 500 handler."""
|
app/page_speed/__init__.py
ADDED
|
File without changes
|
app/page_speed/config.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 2 |
+
from urllib.parse import quote_plus
|
| 3 |
+
|
| 4 |
+
class Settings(BaseSettings):
|
| 5 |
+
# Google API Keys
|
| 6 |
+
pagespeed_api_key: str
|
| 7 |
+
gemini_api_key: str
|
| 8 |
+
|
| 9 |
+
# Chat & RAG Configuration
|
| 10 |
+
groq_api_key: str
|
| 11 |
+
vectorstore_base_path: str = "./vectorstores"
|
| 12 |
+
|
| 13 |
+
# Hugging Face
|
| 14 |
+
huggingfacehub_api_token: str
|
| 15 |
+
|
| 16 |
+
# MongoDB Config
|
| 17 |
+
mongo_password: str
|
| 18 |
+
mongo_chat_db: str = "MAAS"
|
| 19 |
+
mongo_chat_collection: str = "chat_histories"
|
| 20 |
+
|
| 21 |
+
# FastAPI Server Config
|
| 22 |
+
host: str = "0.0.0.0"
|
| 23 |
+
port: int = 8000
|
| 24 |
+
debug: bool = False
|
| 25 |
+
|
| 26 |
+
# MongoDB Atlas URI (Dynamically Constructed)
|
| 27 |
+
@property
|
| 28 |
+
def mongo_uri(self):
|
| 29 |
+
encoded_pwd = quote_plus(self.mongo_password)
|
| 30 |
+
return f"mongodb+srv://Hammad:{encoded_pwd}@cluster0.oi9z5ig.mongodb.net/{self.mongo_chat_db}?retryWrites=true&w=majority"
|
| 31 |
+
|
| 32 |
+
model_config = SettingsConfigDict(
|
| 33 |
+
env_file=".env",
|
| 34 |
+
env_file_encoding="utf-8",
|
| 35 |
+
extra="ignore"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Global settings instance
|
| 39 |
+
settings = Settings()
|
app/{models.py β page_speed/models.py}
RENAMED
|
@@ -109,4 +109,14 @@ class PriorityRequest(BaseModel):
|
|
| 109 |
class PriorityResponse(BaseModel):
|
| 110 |
success: bool
|
| 111 |
priorities: Optional[Dict[str, List[str]]] = None
|
| 112 |
-
error: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
class PriorityResponse(BaseModel):
|
| 110 |
success: bool
|
| 111 |
priorities: Optional[Dict[str, List[str]]] = None
|
| 112 |
+
error: Optional[str] = None
|
| 113 |
+
|
| 114 |
+
class AnalyzeRequest(BaseModel):
|
| 115 |
+
url: HttpUrl
|
| 116 |
+
|
| 117 |
+
class AnalyzeResponse(BaseModel):
|
| 118 |
+
success: bool
|
| 119 |
+
url: HttpUrl
|
| 120 |
+
report: str | None
|
| 121 |
+
priorities: dict | None
|
| 122 |
+
error: str | None
|
app/page_speed/routes.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends
|
| 2 |
+
from app.page_speed.models import (
|
| 3 |
+
AnalyzeRequest,
|
| 4 |
+
AnalyzeResponse
|
| 5 |
+
)
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
from app.page_speed.services import PageSpeedService
|
| 10 |
+
|
| 11 |
+
router = APIRouter(prefix="/pagespeed", tags=["PageSpeed"])
|
| 12 |
+
|
| 13 |
+
"""
|
| 14 |
+
PageSpeed Insights API routes for analyzing URLs and generating reports.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger("app.page_speed.routes")
|
| 18 |
+
logger.setLevel(logging.INFO)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def get_pagespeed_service() -> PageSpeedService:
|
| 22 |
+
"""Dependency to get a new PageSpeedService instance."""
|
| 23 |
+
return PageSpeedService()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@router.post("/analyze-url", response_model=AnalyzeResponse)
|
| 27 |
+
async def analyze_url(
|
| 28 |
+
request: AnalyzeRequest,
|
| 29 |
+
service: PageSpeedService = Depends(get_pagespeed_service)
|
| 30 |
+
):
|
| 31 |
+
"""
|
| 32 |
+
One-stop endpoint to fetch PageSpeed data, generate report, and derive priorities.
|
| 33 |
+
|
| 34 |
+
- Takes a single 'url' field in the body.
|
| 35 |
+
- Returns pagespeed_data, human-friendly report, and priority lists.
|
| 36 |
+
"""
|
| 37 |
+
url_str = str(request.url)
|
| 38 |
+
logger.info("Received POST /analyze-url for URL: %s", url_str)
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
# 1. Fetch raw PageSpeed Insights data
|
| 42 |
+
pagespeed_data = service.get_pagespeed_data(url_str)
|
| 43 |
+
logger.debug("Fetched PageSpeed data (bytes=%d)", len(str(pagespeed_data)))
|
| 44 |
+
|
| 45 |
+
# 2. Generate text report via Gemini
|
| 46 |
+
report_text = service.generate_report_with_gemini(pagespeed_data)
|
| 47 |
+
logger.debug("Generated report text (chars=%d)", len(report_text))
|
| 48 |
+
|
| 49 |
+
# 3. Produce prioritized improvements
|
| 50 |
+
priorities = service.generate_priority(report_text)
|
| 51 |
+
logger.info("Analysis complete for %s", url_str)
|
| 52 |
+
|
| 53 |
+
return AnalyzeResponse(
|
| 54 |
+
success=True,
|
| 55 |
+
url=url_str,
|
| 56 |
+
report=report_text,
|
| 57 |
+
priorities=priorities,
|
| 58 |
+
error=None
|
| 59 |
+
)
|
| 60 |
+
except Exception as e:
|
| 61 |
+
logger.error("Error in /analyze-url: %s", e, exc_info=True)
|
| 62 |
+
return AnalyzeResponse(
|
| 63 |
+
success=False,
|
| 64 |
+
url=url_str,
|
| 65 |
+
report=None,
|
| 66 |
+
priorities=None,
|
| 67 |
+
error=str(e)
|
| 68 |
+
)
|
app/{services.py β page_speed/services.py}
RENAMED
|
@@ -6,7 +6,7 @@ import requests
|
|
| 6 |
import logging
|
| 7 |
import google.generativeai as genai
|
| 8 |
from typing import Dict, Any
|
| 9 |
-
from app.config import settings
|
| 10 |
|
| 11 |
# Create a module-level logger
|
| 12 |
logger = logging.getLogger(__name__)
|
|
@@ -116,106 +116,78 @@ class PageSpeedService:
|
|
| 116 |
|
| 117 |
def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
|
| 118 |
"""
|
| 119 |
-
Create the specialized prompt for Gemini analysis.
|
| 120 |
|
| 121 |
Args:
|
| 122 |
pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
|
| 123 |
|
| 124 |
Returns:
|
| 125 |
-
str:
|
| 126 |
"""
|
| 127 |
logger.debug("Building Gemini analysis prompt from PageSpeed data.")
|
| 128 |
return f"""
|
|
|
|
| 129 |
You are an **Expert Web Performance Optimization Consultant**. The following JSON `{pagespeed_data}` contains exactly these keys (all required):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
-
```
|
| 132 |
-
{{
|
| 133 |
-
"url": string, // analyzed page URL
|
| 134 |
-
"origin": string, // origin domain
|
| 135 |
-
"loading_experience": {{ // Chrome UX data for URL
|
| 136 |
-
"overall_category": "FAST"|"AVERAGE"|"SLOW",
|
| 137 |
-
"metrics": {{
|
| 138 |
-
"CLS": {{ "percentile": number, "category": string }},
|
| 139 |
-
"TTFB": {{ "percentile": number, "category": string }},
|
| 140 |
-
"FCP": {{ "percentile": number, "category": string }},
|
| 141 |
-
"INP": {{ "percentile": number, "category": string }}
|
| 142 |
-
}}
|
| 143 |
-
}},
|
| 144 |
-
"origin_loading_experience": {{ // Chrome UX data for origin
|
| 145 |
-
"overall_category": "FAST"|"AVERAGE"|"SLOW"
|
| 146 |
-
}},
|
| 147 |
-
"lighthouse_audits": [ // only audits with score <1 or notApplicable
|
| 148 |
-
{{
|
| 149 |
-
"id": string, // audit identifier
|
| 150 |
-
"numeric_value": number, // ms or unit value
|
| 151 |
-
"score": number|null, // 0β1 or null if N/A
|
| 152 |
-
"description": string, // audit title/description
|
| 153 |
-
"details": {{ // optional details for resource URLs
|
| 154 |
-
"items": [ {{ "url": string }} ]
|
| 155 |
-
}},
|
| 156 |
-
"metric_savings_ms"?: number // if available
|
| 157 |
-
}}
|
| 158 |
-
]
|
| 159 |
-
}}
|
| 160 |
-
```
|
| 161 |
-
|
| 162 |
-
Your job: output **exactly** the following JSON reportβno extra keys, no prose outside these structures:
|
| 163 |
-
|
| 164 |
-
```json
|
| 165 |
-
{{
|
| 166 |
-
"overall_score": integer,
|
| 167 |
-
"grade": "A"|"B"|"C"|"D"|"F",
|
| 168 |
-
"summary": {{
|
| 169 |
-
"CLS": {{ "value": number, "category": string }},
|
| 170 |
-
"TTFB": {{ "value": number, "category": string }},
|
| 171 |
-
"FCP": {{ "value": number, "category": string }},
|
| 172 |
-
"INP": {{ "value": number, "category": string }},
|
| 173 |
-
"LCP": {{ "value": number, "score": number }},
|
| 174 |
-
"TBT": {{ "value": number, "score": number }}
|
| 175 |
-
}},
|
| 176 |
-
"top_issues": [string],
|
| 177 |
-
"top_opportunities": [string],
|
| 178 |
-
"audits": [
|
| 179 |
-
{{
|
| 180 |
-
"id": string,
|
| 181 |
-
"value": number,
|
| 182 |
-
"score": number|null,
|
| 183 |
-
"resource_url"?: string, // first offending URL from details.items
|
| 184 |
-
"status": "critical"|"needs_improvement"|"good",
|
| 185 |
-
"recommendation": string,
|
| 186 |
-
"expected_gain_s": number
|
| 187 |
-
}}
|
| 188 |
-
],
|
| 189 |
-
"action_plan": [
|
| 190 |
-
{{
|
| 191 |
-
"id": string,
|
| 192 |
-
"fix": string,
|
| 193 |
-
"platform_tip"?: string, // e.g. Next.js `next/image` or WordPress-specific advice
|
| 194 |
-
"effort": "low"|"medium"|"high"
|
| 195 |
-
}}
|
| 196 |
-
],
|
| 197 |
-
"monitoring": {{
|
| 198 |
-
"frequency": string,
|
| 199 |
-
"methods": [string],
|
| 200 |
-
"ci_snippet"?: string // optional GitHub Action or Lighthouse CI config
|
| 201 |
-
}}
|
| 202 |
-
}}```
|
| 203 |
-
**Requirements:**
|
| 204 |
-
- **Strict Mapping:** Every field derives from `{{PSI_DATA}}` (use JSON paths like `lighthouseResult.audits[...].numeric_value`).
|
| 205 |
-
- **No Extra Text:** Only the JSON above.
|
| 206 |
-
- **Tie to JSON Paths:** Include resource URLs via `details.items[0].url`.
|
| 207 |
-
- **Exact Code Snippets:** Provide `<link rel="preload"...>` or `<script defer>` snippets.
|
| 208 |
-
- **Quantify Impact:** Use `metric_savings_ms` for each audit to calculate `expected_gain_s`.
|
| 209 |
-
- **Threshold Targets:** State target values, e.g. "Reduce LCP to β€1200β―ms".
|
| 210 |
-
- **PlatformβSpecific Tips:** If known, include stack advice, e.g. Next.js `next/image` or WordPress plugins.
|
| 211 |
-
- **Monitoring CI:** Optionally include a GitHub Action snippet:
|
| 212 |
-
```yaml
|
| 213 |
-
- uses: treosh/lighthouse-ci-action@v5
|
| 214 |
-
with:
|
| 215 |
-
configPath: .lighthouserc.json
|
| 216 |
-
```
|
| 217 |
-
- **Deterministic Scoring & Priority:** Same as before.
|
| 218 |
-
"""
|
| 219 |
|
| 220 |
|
| 221 |
def analyze_url(self, url: str) -> Dict[str, Any]:
|
|
@@ -295,16 +267,15 @@ Classification Rules:
|
|
| 295 |
2. **Measurable Target:** Include the numeric goal (e.g., "Reduce LCP to β€1200β―ms").
|
| 296 |
3. **Resource Context:** Embed the resource URL or file name when relevant.
|
| 297 |
4. **Expected Savings:** Append expected savings in seconds (from `metric_savings_ms`).
|
| 298 |
-
5. **
|
| 299 |
-
6. **
|
| 300 |
-
7. **
|
| 301 |
-
8. **
|
| 302 |
-
9. **Platform Tip:** If known, include stackβspecific advice (e.g., Next.js `next/image`).
|
| 303 |
-
10. **Priority Classification:**
|
| 304 |
- High: Savings β₯ 1.5 seconds or score < 0.25
|
| 305 |
- Medium: Savings between 0.5 and 1.49 seconds or score 0.25 to 0.50
|
| 306 |
- Low: Savings < 0.5 seconds or score between 0.51 and 1.0
|
| 307 |
- Unknown: No savings or score data available
|
|
|
|
| 308 |
|
| 309 |
Important:
|
| 310 |
- Respond with *only* a valid JSON object.
|
|
|
|
| 6 |
import logging
|
| 7 |
import google.generativeai as genai
|
| 8 |
from typing import Dict, Any
|
| 9 |
+
from app.page_speed.config import settings
|
| 10 |
|
| 11 |
# Create a module-level logger
|
| 12 |
logger = logging.getLogger(__name__)
|
|
|
|
| 116 |
|
| 117 |
def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
|
| 118 |
"""
|
| 119 |
+
Create the specialized prompt for Gemini analysis in a human-readable format.
|
| 120 |
|
| 121 |
Args:
|
| 122 |
pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
|
| 123 |
|
| 124 |
Returns:
|
| 125 |
+
str: Human-readable, user-friendly report prompt
|
| 126 |
"""
|
| 127 |
logger.debug("Building Gemini analysis prompt from PageSpeed data.")
|
| 128 |
return f"""
|
| 129 |
+
<<<<<<< HEAD:app/services.py
|
| 130 |
You are an **Expert Web Performance Optimization Consultant**. The following JSON `{pagespeed_data}` contains exactly these keys (all required):
|
| 131 |
+
=======
|
| 132 |
+
You are an **Expert Web Performance Optimization Consultant**. The following JSON `{{pagespeed_data}}` includes detailed website performance metrics from Google PageSpeed Insights.
|
| 133 |
+
>>>>>>> 574c6ac (Update endpoints):app/page_speed/services.py
|
| 134 |
+
|
| 135 |
+
Your task is to analyze this data and generate a human-friendly performance **report in plain English**. The report will be read by a **non-technical business owner**, so keep it understandable while explaining technical concepts briefly when necessary.
|
| 136 |
+
|
| 137 |
+
### Format of Your Response:
|
| 138 |
+
Respond with a **natural language summary (not JSON)**. It should read like a report, not like code or technical output.
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
### Your report must include the following sections:
|
| 143 |
+
|
| 144 |
+
1. **Overall Performance Summary**
|
| 145 |
+
- Explain how fast the website feels to users.
|
| 146 |
+
- Mention the overall category (FAST, AVERAGE, SLOW) and what that means.
|
| 147 |
+
- If origin data differs from page data, point it out.
|
| 148 |
+
|
| 149 |
+
2. **Key Metrics Breakdown**
|
| 150 |
+
- For each metric (`CLS`, `TTFB`, `FCP`, `INP`, `LCP`, `TBT`):
|
| 151 |
+
- Provide the value and performance category (e.g., "good", "needs improvement").
|
| 152 |
+
- Briefly explain what the metric means and how it impacts the user experience.
|
| 153 |
+
- Use simple analogies if possible. (Example: βCLS measures layout shift β like if buttons jump around while loading.β)
|
| 154 |
+
|
| 155 |
+
3. **Top Issues**
|
| 156 |
+
- List and explain the top 3β5 performance problems in plain language.
|
| 157 |
+
- Avoid jargon. Example: βToo many large images are slowing down the page.β
|
| 158 |
+
|
| 159 |
+
4. **Improvement Opportunities**
|
| 160 |
+
- Suggest high-impact actions to improve speed (e.g., compress images, lazy load below-the-fold content).
|
| 161 |
+
- Prioritize based on effort (low/medium/high) and expected time savings.
|
| 162 |
+
- Mention technical fixes where helpful, but **always** explain what they do and **why they help**.
|
| 163 |
+
|
| 164 |
+
5. **Detailed Audit Notes**
|
| 165 |
+
- Mention any specific URLs or files causing problems (e.g., slow scripts, unoptimized images).
|
| 166 |
+
- For each, explain the issue and estimated time it adds to loading.
|
| 167 |
+
- Be clear and concise.
|
| 168 |
+
|
| 169 |
+
6. **Recommended Action Plan**
|
| 170 |
+
- Provide a to-do list of concrete fixes with estimated effort levels.
|
| 171 |
+
- If possible, include tips tailored to platforms (e.g., for WordPress or Next.js).
|
| 172 |
+
|
| 173 |
+
7. **Ongoing Monitoring Advice**
|
| 174 |
+
- Recommend how often they should check performance.
|
| 175 |
+
---
|
| 176 |
+
|
| 177 |
+
### Important:
|
| 178 |
+
- Do **not** output JSON or code blocks unless specifically required.
|
| 179 |
+
- Use a tone that's **professional, helpful, and non-technical**.
|
| 180 |
+
- Help the reader understand what needs fixing and why it matters for their website and users.
|
| 181 |
+
|
| 182 |
+
Example phrasing:
|
| 183 |
+
> "Your site currently loads in about 3.2 seconds for most users, which is considered average. Improving this can reduce bounce rates and improve conversions."
|
| 184 |
+
|
| 185 |
+
Be specific and practical. Use values directly from `{{pagespeed_data}}` such as `numeric_value`, `percentile`, and `category` fields.
|
| 186 |
+
|
| 187 |
+
### PageSpeed Data:
|
| 188 |
+
{json.dumps(pagespeed_data, indent=2)}
|
| 189 |
+
"""
|
| 190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
|
| 193 |
def analyze_url(self, url: str) -> Dict[str, Any]:
|
|
|
|
| 267 |
2. **Measurable Target:** Include the numeric goal (e.g., "Reduce LCP to β€1200β―ms").
|
| 268 |
3. **Resource Context:** Embed the resource URL or file name when relevant.
|
| 269 |
4. **Expected Savings:** Append expected savings in seconds (from `metric_savings_ms`).
|
| 270 |
+
5. **Code Snippet:** Provide a readyβtoβcopy snippet if applicable (e.g., `<img loading="lazy" src=...>`).
|
| 271 |
+
6. **Category Tag:** Prefix with optimization domain `[Image]`, `[CSS]`, `[JS]`, `[Server]`.
|
| 272 |
+
7. **Platform Tip:** If known, include stackβspecific advice (e.g., Next.js `next/image`).
|
| 273 |
+
8. **Priority Classification:**
|
|
|
|
|
|
|
| 274 |
- High: Savings β₯ 1.5 seconds or score < 0.25
|
| 275 |
- Medium: Savings between 0.5 and 1.49 seconds or score 0.25 to 0.50
|
| 276 |
- Low: Savings < 0.5 seconds or score between 0.51 and 1.0
|
| 277 |
- Unknown: No savings or score data available
|
| 278 |
+
9. Explain in easy english, avoiding technical jargon and explaination for technical terms.
|
| 279 |
|
| 280 |
Important:
|
| 281 |
- Respond with *only* a valid JSON object.
|
app/rag/chat_history.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
|
|
| 1 |
import time
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
from pymongo import ReturnDocument
|
| 4 |
|
| 5 |
-
from app.config import settings
|
| 6 |
from .db import mongo_client, chat_collection_name
|
| 7 |
from .embeddings import get_llm
|
|
|
|
| 8 |
from langchain.prompts import ChatPromptTemplate
|
| 9 |
from .logging_config import logger
|
| 10 |
|
|
@@ -78,3 +80,22 @@ class ChatHistoryManager:
|
|
| 78 |
)
|
| 79 |
logger.info("Summarized chat %s down to one message", chat_id)
|
| 80 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
import time
|
| 3 |
from typing import List, Dict, Any
|
| 4 |
from pymongo import ReturnDocument
|
| 5 |
|
| 6 |
+
from app.page_speed.config import settings
|
| 7 |
from .db import mongo_client, chat_collection_name
|
| 8 |
from .embeddings import get_llm
|
| 9 |
+
from .utils import get_vectorstore_path # make sure this util is available
|
| 10 |
from langchain.prompts import ChatPromptTemplate
|
| 11 |
from .logging_config import logger
|
| 12 |
|
|
|
|
| 80 |
)
|
| 81 |
logger.info("Summarized chat %s down to one message", chat_id)
|
| 82 |
return True
|
| 83 |
+
|
| 84 |
+
@staticmethod
|
| 85 |
+
def vectorstore_exists(user_id: str) -> bool:
|
| 86 |
+
"""
|
| 87 |
+
Check if a vectorstore directory already exists for this user.
|
| 88 |
+
"""
|
| 89 |
+
path = get_vectorstore_path(user_id)
|
| 90 |
+
exists = os.path.isdir(path)
|
| 91 |
+
logger.debug("Vectorstore path %s exists: %s", path, exists)
|
| 92 |
+
return exists
|
| 93 |
+
|
| 94 |
+
@staticmethod
|
| 95 |
+
def chat_exists(chat_id: str) -> bool:
|
| 96 |
+
"""
|
| 97 |
+
Check if a chat session already exists in Mongo for this chat_id.
|
| 98 |
+
"""
|
| 99 |
+
found = coll.count_documents({"session_id": chat_id}, limit=1) > 0
|
| 100 |
+
logger.debug("Chat session %s exists: %s", chat_id, found)
|
| 101 |
+
return found
|
app/rag/db.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from pymongo import MongoClient
|
| 2 |
-
from app.config import settings
|
| 3 |
|
| 4 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
# MongoDB Initialization
|
|
|
|
| 1 |
from pymongo import MongoClient
|
| 2 |
+
from app.page_speed.config import settings
|
| 3 |
|
| 4 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
# MongoDB Initialization
|
app/rag/embeddings.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
-
from langchain.prompts import ChatPromptTemplate
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
|
| 7 |
load_dotenv() # now os.getenv(...) will pick up values from your .env file
|
|
|
|
| 1 |
import os
|
| 2 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
| 3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
load_dotenv() # now os.getenv(...) will pick up values from your .env file
|
app/rag/prompt_library.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
from langchain.prompts import ChatPromptTemplate
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 6 |
# 1. Prompt Template for PAGE Speed Insights RAG Chatbot
|
| 7 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 1 |
from langchain.prompts import ChatPromptTemplate
|
| 2 |
|
|
|
|
|
|
|
| 3 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
# 1. Prompt Template for PAGE Speed Insights RAG Chatbot
|
| 5 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
app/rag/routes.py
CHANGED
|
@@ -1,125 +1,117 @@
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
from fastapi import APIRouter, HTTPException
|
| 4 |
-
from typing import Optional
|
| 5 |
|
| 6 |
-
from .schemas import
|
| 7 |
-
IngestRequest,
|
| 8 |
-
IngestResponse,
|
| 9 |
-
CreateChatResponse,
|
| 10 |
-
ChatRequest,
|
| 11 |
-
ChatResponse
|
| 12 |
-
)
|
| 13 |
from .utils import (
|
| 14 |
text_splitter,
|
| 15 |
embeddings,
|
| 16 |
-
get_vectorstore_path,
|
| 17 |
save_vectorstore_to_disk,
|
| 18 |
upsert_vectorstore_metadata,
|
| 19 |
-
|
| 20 |
-
build_rag_chain
|
| 21 |
-
initialize_chat_history
|
| 22 |
)
|
| 23 |
-
from .logging_config import logger
|
| 24 |
-
|
| 25 |
from .chat_history import ChatHistoryManager
|
| 26 |
from .logging_config import logger
|
| 27 |
|
| 28 |
router = APIRouter(prefix="/rag", tags=["rag"])
|
| 29 |
|
| 30 |
-
@router.post("/
|
| 31 |
-
async def
|
| 32 |
"""
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
2. Split into chunks using RecursiveCharacterTextSplitter.
|
| 37 |
-
3. Create a FAISS vectorstore from those chunks.
|
| 38 |
-
4. Save the vectorstore to disk under ./vectorstores/{user_id}/faiss_index.
|
| 39 |
-
5. Upsert metadata in Mongo (user_id -> vectorstore_path).
|
| 40 |
"""
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
all_text = "\n\n".join(body.documents)
|
| 45 |
-
|
| 46 |
-
# 2. Split into chunks
|
| 47 |
text_chunks = text_splitter.split_text(all_text)
|
| 48 |
-
logger.info("Split into %d chunks", len(text_chunks))
|
| 49 |
-
|
| 50 |
-
# 3. Build FAISS vectorstore
|
| 51 |
from langchain.vectorstores import FAISS as _FAISS
|
| 52 |
vs = _FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
logger.info(
|
| 57 |
-
|
| 58 |
-
# 5. Upsert metadata
|
| 59 |
-
upsert_vectorstore_metadata(user_id, faiss_path)
|
| 60 |
-
logger.info("Upserted vectorstore metadata for user_id=%s", user_id)
|
| 61 |
-
|
| 62 |
-
return IngestResponse(
|
| 63 |
-
success=True,
|
| 64 |
-
message="Vectorstore created successfully.",
|
| 65 |
-
user_id=user_id,
|
| 66 |
-
vectorstore_path=faiss_path
|
| 67 |
)
|
| 68 |
-
except Exception as e:
|
| 69 |
-
logger.error("Error during ingestion for user_id=%s: %s", user_id, e, exc_info=True)
|
| 70 |
-
raise HTTPException(status_code=500, detail=f"Ingestion failed: {e}")
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
"""
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
- Return the chat_id so the client can use it in subsequent calls.
|
| 79 |
"""
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
return CreateChatResponse(
|
| 89 |
-
success=True,
|
| 90 |
-
message="Chat session created.",
|
| 91 |
-
user_id=user_id,
|
| 92 |
-
chat_id=chat_id
|
| 93 |
)
|
| 94 |
-
except Exception as e:
|
| 95 |
-
logger.error("Error creating chat for user_id=%s: %s", user_id, e, exc_info=True)
|
| 96 |
-
raise HTTPException(status_code=500, detail=f"Failed to create chat session: {e}")
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
@router.post("/chat/{user_id}/{chat_id}", response_model=ChatResponse)
|
| 100 |
-
async def chat_with_user(user_id: str, chat_id: str, prompt_type:str, body: ChatRequest):
|
| 101 |
question = body.question.strip()
|
| 102 |
-
logger.info("Chat request
|
| 103 |
|
| 104 |
try:
|
| 105 |
-
#
|
| 106 |
-
ChatHistoryManager.create_session(chat_id)
|
| 107 |
-
|
| 108 |
-
# 2) Summarize long histories
|
| 109 |
ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
|
| 110 |
|
| 111 |
-
#
|
| 112 |
ChatHistoryManager.add_message(chat_id, role="human", content=question)
|
| 113 |
|
| 114 |
-
#
|
| 115 |
-
chain = build_rag_chain(
|
| 116 |
history = ChatHistoryManager.get_messages(chat_id)
|
| 117 |
result = chain.invoke({"question": question, "chat_history": history})
|
| 118 |
answer = result.get("answer") or result.get("output_text")
|
| 119 |
if not answer:
|
| 120 |
raise Exception("No answer returned from chain")
|
| 121 |
|
| 122 |
-
#
|
| 123 |
ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
|
| 124 |
|
| 125 |
return ChatResponse(
|
|
@@ -127,17 +119,17 @@ async def chat_with_user(user_id: str, chat_id: str, prompt_type:str, body: Chat
|
|
| 127 |
answer=answer,
|
| 128 |
error=None,
|
| 129 |
chat_id=chat_id,
|
| 130 |
-
|
| 131 |
)
|
| 132 |
|
| 133 |
except HTTPException:
|
| 134 |
raise
|
| 135 |
except Exception as e:
|
| 136 |
-
logger.error("Error chatting
|
| 137 |
return ChatResponse(
|
| 138 |
success=False,
|
| 139 |
answer=None,
|
| 140 |
error=str(e),
|
| 141 |
chat_id=chat_id,
|
| 142 |
-
|
| 143 |
)
|
|
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
from fastapi import APIRouter, HTTPException
|
|
|
|
| 4 |
|
| 5 |
+
from .schemas import SetupRequest, ChatRequest, SetupResponse, ChatResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from .utils import (
|
| 7 |
text_splitter,
|
| 8 |
embeddings,
|
|
|
|
| 9 |
save_vectorstore_to_disk,
|
| 10 |
upsert_vectorstore_metadata,
|
| 11 |
+
get_vectorstore_path,
|
| 12 |
+
build_rag_chain
|
|
|
|
| 13 |
)
|
|
|
|
|
|
|
| 14 |
from .chat_history import ChatHistoryManager
|
| 15 |
from .logging_config import logger
|
| 16 |
|
| 17 |
router = APIRouter(prefix="/rag", tags=["rag"])
|
| 18 |
|
| 19 |
+
@router.post("/initialization/{onboarding_id}", response_model=SetupResponse)
|
| 20 |
+
async def setup_rag_session(onboarding_id: str, body: SetupRequest):
|
| 21 |
"""
|
| 22 |
+
Single endpoint to ingest documents and create a chat session.
|
| 23 |
+
- If vectorstore exists for user_id, skip ingestion.
|
| 24 |
+
- Always create a new chat_id for this session.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
| 26 |
+
# 1. Handle vectorstore existence
|
| 27 |
+
vectorstore_path = get_vectorstore_path(onboarding_id)
|
| 28 |
+
if os.path.isdir(vectorstore_path):
|
| 29 |
+
logger.info(
|
| 30 |
+
"Vectorstore exists for onboarding_id=%s at %s; skipping ingestion",
|
| 31 |
+
onboarding_id, vectorstore_path
|
| 32 |
+
)
|
| 33 |
+
vs_path = vectorstore_path
|
| 34 |
+
else:
|
| 35 |
+
if not body.documents:
|
| 36 |
+
logger.error(
|
| 37 |
+
"Vectorstore missing for onboarding_id=%s and no documents provided", onboarding_id
|
| 38 |
+
)
|
| 39 |
+
raise HTTPException(
|
| 40 |
+
status_code=400,
|
| 41 |
+
detail="Vectorstore does not exist; please provide documents to ingest."
|
| 42 |
+
)
|
| 43 |
+
# Ingest new vectorstore
|
| 44 |
all_text = "\n\n".join(body.documents)
|
|
|
|
|
|
|
| 45 |
text_chunks = text_splitter.split_text(all_text)
|
| 46 |
+
logger.info("Split into %d chunks for ingestion", len(text_chunks))
|
|
|
|
|
|
|
| 47 |
from langchain.vectorstores import FAISS as _FAISS
|
| 48 |
vs = _FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 49 |
+
vs_path = save_vectorstore_to_disk(vs, onboarding_id)
|
| 50 |
+
logger.info("Saved FAISS index to %s", vs_path)
|
| 51 |
+
upsert_vectorstore_metadata(onboarding_id, vs_path)
|
| 52 |
+
logger.info(
|
| 53 |
+
"Upserted vectorstore metadata for onboarding_id=%s", onboarding_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
)
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
# Create new chat session
|
| 57 |
+
chat_id = str(uuid.uuid4())
|
| 58 |
+
ChatHistoryManager.create_session(chat_id)
|
| 59 |
+
logger.info(
|
| 60 |
+
"Created new chat session %s for onboarding_id=%s",
|
| 61 |
+
chat_id, onboarding_id
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
return SetupResponse(
|
| 65 |
+
success=True,
|
| 66 |
+
message="RAG setup completed.",
|
| 67 |
+
onboarding_id=onboarding_id,
|
| 68 |
+
chat_id=chat_id,
|
| 69 |
+
vectorstore_path=vs_path
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
@router.post("/chat/{onboarding_id}/{chat_id}", response_model=ChatResponse)
|
| 73 |
+
async def chat_with_user(onboarding_id: str, chat_id: str, prompt_type: str, body: ChatRequest):
|
| 74 |
"""
|
| 75 |
+
Chat endpoint that uses an existing chat session and vectorstore.
|
| 76 |
+
- Validates that the vectorstore exists for onboarding_id.
|
| 77 |
+
- Validates that the chat session exists.
|
|
|
|
| 78 |
"""
|
| 79 |
+
# 0. Validate vectorstore
|
| 80 |
+
vectorstore_path = get_vectorstore_path(onboarding_id)
|
| 81 |
+
if not os.path.isdir(vectorstore_path):
|
| 82 |
+
logger.error("Vectorstore not found for onboarding_id=%s", onboarding_id)
|
| 83 |
+
raise HTTPException(
|
| 84 |
+
status_code=400,
|
| 85 |
+
detail="Vectorstore not found for this onboarding_id. Please run /setup first."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
)
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
# 1. Ensure chat session exists
|
| 89 |
+
if not ChatHistoryManager.chat_exists(chat_id):
|
| 90 |
+
logger.error("Chat session %s not found for onboarding_id=%s", chat_id, onboarding_id)
|
| 91 |
+
raise HTTPException(
|
| 92 |
+
status_code=404,
|
| 93 |
+
detail=f"Chat session {chat_id} does not exist."
|
| 94 |
+
)
|
| 95 |
|
|
|
|
|
|
|
| 96 |
question = body.question.strip()
|
| 97 |
+
logger.info("Chat request onboarding_id=%s chat=%s question=%s", onboarding_id, chat_id, question)
|
| 98 |
|
| 99 |
try:
|
| 100 |
+
# Summarize long histories
|
|
|
|
|
|
|
|
|
|
| 101 |
ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
|
| 102 |
|
| 103 |
+
# Record the user message
|
| 104 |
ChatHistoryManager.add_message(chat_id, role="human", content=question)
|
| 105 |
|
| 106 |
+
# Build and invoke the RAG chain
|
| 107 |
+
chain = build_rag_chain(onboarding_id, chat_id, prompt_type)
|
| 108 |
history = ChatHistoryManager.get_messages(chat_id)
|
| 109 |
result = chain.invoke({"question": question, "chat_history": history})
|
| 110 |
answer = result.get("answer") or result.get("output_text")
|
| 111 |
if not answer:
|
| 112 |
raise Exception("No answer returned from chain")
|
| 113 |
|
| 114 |
+
# Record the AI response
|
| 115 |
ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
|
| 116 |
|
| 117 |
return ChatResponse(
|
|
|
|
| 119 |
answer=answer,
|
| 120 |
error=None,
|
| 121 |
chat_id=chat_id,
|
| 122 |
+
onboarding_id=onboarding_id
|
| 123 |
)
|
| 124 |
|
| 125 |
except HTTPException:
|
| 126 |
raise
|
| 127 |
except Exception as e:
|
| 128 |
+
logger.error("Error chatting onboarding_id=%s chat=%s: %s", onboarding_id, chat_id, e, exc_info=True)
|
| 129 |
return ChatResponse(
|
| 130 |
success=False,
|
| 131 |
answer=None,
|
| 132 |
error=str(e),
|
| 133 |
chat_id=chat_id,
|
| 134 |
+
onboarding_id=onboarding_id
|
| 135 |
)
|
app/rag/schemas.py
CHANGED
|
@@ -1,33 +1,6 @@
|
|
| 1 |
from pydantic import BaseModel, Field
|
| 2 |
from typing import List, Optional
|
| 3 |
|
| 4 |
-
class IngestRequest(BaseModel):
|
| 5 |
-
"""
|
| 6 |
-
Request body for ingesting documents into a user's FAISS vector store.
|
| 7 |
-
"""
|
| 8 |
-
documents: List[str] = Field(
|
| 9 |
-
...,
|
| 10 |
-
description="A list of text documents (strings) to ingest into the vector store."
|
| 11 |
-
)
|
| 12 |
-
|
| 13 |
-
class IngestResponse(BaseModel):
|
| 14 |
-
"""
|
| 15 |
-
Response after ingesting documents for a user.
|
| 16 |
-
"""
|
| 17 |
-
success: bool
|
| 18 |
-
message: str
|
| 19 |
-
user_id: str
|
| 20 |
-
vectorstore_path: Optional[str] = None
|
| 21 |
-
|
| 22 |
-
class CreateChatResponse(BaseModel):
|
| 23 |
-
"""
|
| 24 |
-
Response after creating a new chat session for a user.
|
| 25 |
-
"""
|
| 26 |
-
success: bool
|
| 27 |
-
message: str
|
| 28 |
-
user_id: str
|
| 29 |
-
chat_id: Optional[str] = None
|
| 30 |
-
|
| 31 |
class ChatRequest(BaseModel):
|
| 32 |
"""
|
| 33 |
Body for sending a user message to an existing chat session.
|
|
@@ -42,4 +15,14 @@ class ChatResponse(BaseModel):
|
|
| 42 |
answer: Optional[str] = None
|
| 43 |
error: Optional[str] = None
|
| 44 |
chat_id: str
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pydantic import BaseModel, Field
|
| 2 |
from typing import List, Optional
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
class ChatRequest(BaseModel):
|
| 5 |
"""
|
| 6 |
Body for sending a user message to an existing chat session.
|
|
|
|
| 15 |
answer: Optional[str] = None
|
| 16 |
error: Optional[str] = None
|
| 17 |
chat_id: str
|
| 18 |
+
onboarding_id: str
|
| 19 |
+
|
| 20 |
+
class SetupRequest(BaseModel):
|
| 21 |
+
documents: List[str]
|
| 22 |
+
|
| 23 |
+
class SetupResponse(BaseModel):
|
| 24 |
+
success: bool
|
| 25 |
+
message: str
|
| 26 |
+
onboarding_id: str
|
| 27 |
+
chat_id: str
|
| 28 |
+
vectorstore_path: str
|
app/rag/utils.py
CHANGED
|
@@ -7,7 +7,7 @@ from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
|
|
| 7 |
from langchain.memory import ConversationBufferMemory # β IMPORT THIS
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
|
| 10 |
-
from app.config import settings
|
| 11 |
from .db import vectorstore_meta_coll, chat_collection_name
|
| 12 |
from .embeddings import embeddings, text_splitter, get_llm
|
| 13 |
from .logging_config import logger
|
|
@@ -23,7 +23,7 @@ def get_vectorstore_path(user_id: str) -> str:
|
|
| 23 |
"""
|
| 24 |
base_dir = settings.vectorstore_base_path
|
| 25 |
user_dir = os.path.join(base_dir, user_id)
|
| 26 |
-
os.makedirs(user_dir, exist_ok=True)
|
| 27 |
return user_dir
|
| 28 |
|
| 29 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 7 |
from langchain.memory import ConversationBufferMemory # β IMPORT THIS
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
|
| 10 |
+
from app.page_speed.config import settings
|
| 11 |
from .db import vectorstore_meta_coll, chat_collection_name
|
| 12 |
from .embeddings import embeddings, text_splitter, get_llm
|
| 13 |
from .logging_config import logger
|
|
|
|
| 23 |
"""
|
| 24 |
base_dir = settings.vectorstore_base_path
|
| 25 |
user_dir = os.path.join(base_dir, user_id)
|
| 26 |
+
# os.makedirs(user_dir, exist_ok=True)
|
| 27 |
return user_dir
|
| 28 |
|
| 29 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
app/seo/__init__.py
ADDED
|
File without changes
|
app/seo/models.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Any, Dict
|
| 3 |
+
|
| 4 |
+
class SEORequest(BaseModel):
|
| 5 |
+
seo_data: Dict[str, Any]
|
app/seo/routes.py
CHANGED
|
@@ -1,38 +1,27 @@
|
|
| 1 |
from fastapi import APIRouter, HTTPException
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
-
from typing import Any, Dict
|
| 4 |
from .seo_service import SEOService
|
| 5 |
-
|
| 6 |
|
| 7 |
router = APIRouter(prefix="/seo", tags=["SEO"])
|
| 8 |
|
| 9 |
seo_service = SEOService()
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
seo_data: Dict[str, Any]
|
| 14 |
-
|
| 15 |
-
class SEOPriorityRequest(BaseModel):
|
| 16 |
-
report: str
|
| 17 |
-
|
| 18 |
-
@router.post("/generate-report")
|
| 19 |
-
def generate_seo_report(request: SEORequest):
|
| 20 |
"""
|
| 21 |
-
Generate SEO report
|
| 22 |
"""
|
| 23 |
try:
|
|
|
|
| 24 |
report = seo_service.generate_seo_report(request.seo_data)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
try:
|
| 35 |
-
priority_suggestions = seo_service.generate_seo_priority(request.report)
|
| 36 |
-
return {"success": True, "priority_suggestions": priority_suggestions}
|
| 37 |
except Exception as e:
|
| 38 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
| 1 |
from fastapi import APIRouter, HTTPException
|
|
|
|
|
|
|
| 2 |
from .seo_service import SEOService
|
| 3 |
+
from .models import SEORequest
|
| 4 |
|
| 5 |
router = APIRouter(prefix="/seo", tags=["SEO"])
|
| 6 |
|
| 7 |
seo_service = SEOService()
|
| 8 |
|
| 9 |
+
@router.post("/generate-full-report")
|
| 10 |
+
def generate_full_seo_analysis(request: SEORequest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"""
|
| 12 |
+
Generate full SEO analysis: report + prioritized suggestions.
|
| 13 |
"""
|
| 14 |
try:
|
| 15 |
+
# Step 1: Generate SEO report (as a string)
|
| 16 |
report = seo_service.generate_seo_report(request.seo_data)
|
| 17 |
+
|
| 18 |
+
# Step 2: Generate prioritized SEO suggestions from the report
|
| 19 |
+
priority_suggestions = seo_service.generate_seo_priority(report)
|
| 20 |
+
|
| 21 |
+
return {
|
| 22 |
+
"success": True,
|
| 23 |
+
"report": report,
|
| 24 |
+
"priority_suggestions": priority_suggestions
|
| 25 |
+
}
|
|
|
|
|
|
|
|
|
|
| 26 |
except Exception as e:
|
| 27 |
raise HTTPException(status_code=500, detail=str(e))
|
app/seo/seo_service.py
CHANGED
|
@@ -6,15 +6,11 @@ import requests
|
|
| 6 |
import logging
|
| 7 |
import google.generativeai as genai
|
| 8 |
from typing import Dict, Any
|
| 9 |
-
from app.config import settings
|
| 10 |
|
| 11 |
# Create a module-level logger
|
| 12 |
glogger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
class SEOService:
|
| 19 |
"""
|
| 20 |
Service class for generating SEO reports via Gemini.
|
|
@@ -67,127 +63,105 @@ class SEOService:
|
|
| 67 |
Build the advanced prompt for SEO analysis based on the updated specialized template.
|
| 68 |
"""
|
| 69 |
return f"""
|
| 70 |
-
You are an **Expert SEO Consultant** with
|
| 71 |
-
|
| 72 |
-
The following JSON `{{SEO_DATA}}` contains exactly these keys (all required):
|
| 73 |
-
|
| 74 |
-
{json.dumps(seo_data, indent=2)}
|
| 75 |
-
|
| 76 |
-
Your task is to output **exactly** the following JSON reportβno additional text, no extra keys, no commentary:
|
| 77 |
|
| 78 |
-
|
| 79 |
-
{{
|
| 80 |
-
"overall_score": integer,
|
| 81 |
-
"grade": "A"|"B"|"C"|"D"|"F",
|
| 82 |
-
"top_strengths": [string],
|
| 83 |
-
"top_issues": [string],
|
| 84 |
-
"metrics": [
|
| 85 |
-
{{
|
| 86 |
-
"name": string,
|
| 87 |
-
"value": string|number|boolean|array,
|
| 88 |
-
"benchmark": string,
|
| 89 |
-
"score": integer,
|
| 90 |
-
"status": "good"|"needs_improvement"|"critical",
|
| 91 |
-
"why_it_matters": string,
|
| 92 |
-
"recommendation": string
|
| 93 |
-
}}
|
| 94 |
-
],
|
| 95 |
-
"action_plan": [
|
| 96 |
-
{{
|
| 97 |
-
"metric": string,
|
| 98 |
-
"fix": string,
|
| 99 |
-
"effort_level": "low"|"medium"|"high"
|
| 100 |
-
}}
|
| 101 |
-
],
|
| 102 |
-
"monitoring": {{
|
| 103 |
-
"frequency": string,
|
| 104 |
-
"methods": [string]
|
| 105 |
-
}},
|
| 106 |
-
"technical_seo": "data_unavailable" | {{
|
| 107 |
-
"core_web_vitals": {{
|
| 108 |
-
"LCP": string,
|
| 109 |
-
"FID": string,
|
| 110 |
-
"CLS": string
|
| 111 |
-
}},
|
| 112 |
-
"page_speed_score": integer,
|
| 113 |
-
"lazy_loading": boolean,
|
| 114 |
-
"security_headers": [string]
|
| 115 |
-
}},
|
| 116 |
-
"schema_markup": "data_unavailable" | {{
|
| 117 |
-
"structured_data_types": [string],
|
| 118 |
-
"valid": boolean
|
| 119 |
-
}},
|
| 120 |
-
"backlink_profile": "data_unavailable" | {{
|
| 121 |
-
"referring_domains": integer,
|
| 122 |
-
"toxic_links": integer,
|
| 123 |
-
"recommendations": string
|
| 124 |
-
}},
|
| 125 |
-
"trend_comparison": "data_unavailable" | {{
|
| 126 |
-
"previous_score": integer,
|
| 127 |
-
"change": "increase"|"decrease"|"no_change",
|
| 128 |
-
"comment": string
|
| 129 |
-
}}
|
| 130 |
-
}}
|
| 131 |
|
| 132 |
-
|
| 133 |
|
| 134 |
-
|
| 135 |
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
|
| 139 |
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
|
| 143 |
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
-
|
| 147 |
|
| 148 |
-
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
|
| 151 |
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
-
|
| 155 |
|
| 156 |
-
|
| 157 |
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
-
|
| 161 |
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
-
|
| 165 |
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
-
|
| 169 |
|
| 170 |
-
|
| 171 |
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
|
|
|
|
| 177 |
|
| 178 |
-
"monthly" if all metrics are "good".
|
| 179 |
-
|
| 180 |
-
Grading scale:
|
| 181 |
-
|
| 182 |
-
90β100: A
|
| 183 |
-
|
| 184 |
-
80β89: B
|
| 185 |
-
|
| 186 |
-
70β79: C
|
| 187 |
-
|
| 188 |
-
60β69: D
|
| 189 |
-
|
| 190 |
-
<60: F
|
| 191 |
"""
|
| 192 |
|
| 193 |
def generate_seo_priority(self, report: str) -> Dict[str, Any]:
|
|
@@ -233,19 +207,19 @@ Classification Rules:
|
|
| 233 |
2. **Benchmark Comparison:** Include both the **current value** and the **ideal benchmark**
|
| 234 |
(e.g. `"Current: 15 keywords, Ideal: 1β3% density"`).
|
| 235 |
3. **Impact Estimate:** Quantify expected SEO impact (e.g. `"+12% CTR"` or `"+0.5 page rank score"`).
|
| 236 |
-
4. **
|
| 237 |
-
5. **Code Snippet:** Provide a readyβtoβcopy example if applicable
|
| 238 |
(e.g. `<meta name="description" content="...">`).
|
| 239 |
-
|
| 240 |
`[On-Page]`, `[Technical]`, `[Off-Page]`, `[Local]`, `[Schema]`.
|
| 241 |
-
|
| 242 |
-
8. **Platform Tip:** If applicable, include CMS or framework advice
|
| 243 |
(e.g. `"WordPress: use Yoast SEO"`, `"Next.js: use next/head"`).
|
| 244 |
-
|
| 245 |
- **High:** Any metric with score `"critical"` or <β―60, or impact β₯β―10%.
|
| 246 |
- **Medium:** Score 60β79 or impact 5β9%.
|
| 247 |
- **Low:** Score 80β100 or impact <β―5%.
|
| 248 |
- **Unknown:** No score or impact data available.
|
|
|
|
|
|
|
| 249 |
|
| 250 |
Important:
|
| 251 |
- Respond with *only* a valid JSON object.
|
|
|
|
| 6 |
import logging
|
| 7 |
import google.generativeai as genai
|
| 8 |
from typing import Dict, Any
|
| 9 |
+
from app.page_speed.config import settings
|
| 10 |
|
| 11 |
# Create a module-level logger
|
| 12 |
glogger = logging.getLogger(__name__)
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
class SEOService:
|
| 15 |
"""
|
| 16 |
Service class for generating SEO reports via Gemini.
|
|
|
|
| 63 |
Build the advanced prompt for SEO analysis based on the updated specialized template.
|
| 64 |
"""
|
| 65 |
return f"""
|
| 66 |
+
You are an **Expert SEO Consultant** with advanced knowledge of on-page, technical, and off-page SEO.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
Your task is to analyze this data and return a detailed SEO audit report as a **multi-line string** (not as JSON). Keep it structured, clear, and easy to read β for example, using sections, bullet points, and indentation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
+
Include these sections in your output:
|
| 71 |
|
| 72 |
+
---
|
| 73 |
|
| 74 |
+
**Overall Summary**
|
| 75 |
+
- Overall SEO Score: (0β100)
|
| 76 |
+
- Grade: A, B, C, D, or F
|
| 77 |
+
- Top Strengths: List the top 3β5 strong areas
|
| 78 |
+
- Top Issues: List the top 3β5 weak/problematic areas
|
| 79 |
|
| 80 |
+
---
|
| 81 |
|
| 82 |
+
**Metric Breakdown**
|
| 83 |
+
For each key metric in the data:
|
| 84 |
+
- Metric Name
|
| 85 |
+
- Value: ...
|
| 86 |
+
- Benchmark: ...
|
| 87 |
+
- Score: ...
|
| 88 |
+
- Status: good / needs improvement / critical
|
| 89 |
+
- Why It Matters: Explain simply
|
| 90 |
+
- Recommendation: What to fix or improve
|
| 91 |
|
| 92 |
+
---
|
| 93 |
|
| 94 |
+
**Action Plan**
|
| 95 |
+
List 5 weakest metrics and how to fix them:
|
| 96 |
+
- Metric: ...
|
| 97 |
+
- Fix: ...
|
| 98 |
+
- Effort Level: low / medium / high
|
| 99 |
|
| 100 |
+
---
|
| 101 |
|
| 102 |
+
**Monitoring Strategy**
|
| 103 |
+
- Frequency: weekly or monthly (based on severity of issues)
|
| 104 |
+
- Methods: Tools or techniques to track progress
|
| 105 |
|
| 106 |
+
---
|
| 107 |
|
| 108 |
+
**Technical SEO**
|
| 109 |
+
If data is available, include:
|
| 110 |
+
- Core Web Vitals (LCP, FID, CLS)
|
| 111 |
+
- Page Speed Score
|
| 112 |
+
- Lazy Loading Enabled
|
| 113 |
+
- Security Headers Present
|
| 114 |
|
| 115 |
+
If not available, just write βTechnical SEO data not available.β
|
| 116 |
|
| 117 |
+
---
|
| 118 |
|
| 119 |
+
**Schema Markup**
|
| 120 |
+
If available:
|
| 121 |
+
- Types Detected
|
| 122 |
+
- Is Valid: Yes/No
|
| 123 |
+
Else: βSchema markup data not available.β
|
| 124 |
|
| 125 |
+
---
|
| 126 |
|
| 127 |
+
**Backlink Profile**
|
| 128 |
+
If available:
|
| 129 |
+
- Referring Domains
|
| 130 |
+
- Toxic Links
|
| 131 |
+
- Recommendations to improve off-page SEO
|
| 132 |
|
| 133 |
+
---
|
| 134 |
|
| 135 |
+
**Trend Comparison**
|
| 136 |
+
If available:
|
| 137 |
+
- Previous Score
|
| 138 |
+
- Score Change (increase, decrease, or no change)
|
| 139 |
+
- Comment
|
| 140 |
|
| 141 |
+
---
|
| 142 |
|
| 143 |
+
### βοΈ Scoring Rules Summary (for reference):
|
| 144 |
|
| 145 |
+
- SEO Score: β€50 = critical, 51β70 = needs improvement, >70 = good
|
| 146 |
+
- Meta Title: 50β60 chars = good, else needs improvement
|
| 147 |
+
- H1 Tags: exactly 1 = good, 0 or >1 = needs improvement/critical
|
| 148 |
+
- Heading Errors: any = critical
|
| 149 |
+
- Image Alt Tags: β₯90% = good, 50β89% = needs improvement, <50% = critical
|
| 150 |
+
- sitemapXmlCheck / robotsTxtCheck: missing = critical
|
| 151 |
+
- indexabilityCheck: false = critical
|
| 152 |
+
- internalLinksCount: <5 = needs improvement
|
| 153 |
+
- externalLinksCount: <2 = needs improvement
|
| 154 |
|
| 155 |
+
Use these rules to calculate metric status and overall grade:
|
| 156 |
+
- 90β100 β A
|
| 157 |
+
- 80β89 β B
|
| 158 |
+
- 70β79 β C
|
| 159 |
+
- 60β69 β D
|
| 160 |
+
- <60 β F
|
| 161 |
|
| 162 |
+
SEO data provided in JSON format:
|
| 163 |
+
{seo_data}
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
"""
|
| 166 |
|
| 167 |
def generate_seo_priority(self, report: str) -> Dict[str, Any]:
|
|
|
|
| 207 |
2. **Benchmark Comparison:** Include both the **current value** and the **ideal benchmark**
|
| 208 |
(e.g. `"Current: 15 keywords, Ideal: 1β3% density"`).
|
| 209 |
3. **Impact Estimate:** Quantify expected SEO impact (e.g. `"+12% CTR"` or `"+0.5 page rank score"`).
|
| 210 |
+
4. **Code Snippet:** Provide a readyβtoβcopy example if applicable
|
|
|
|
| 211 |
(e.g. `<meta name="description" content="...">`).
|
| 212 |
+
5. **Category Tag:** Prefix with SEO domainβ
|
| 213 |
`[On-Page]`, `[Technical]`, `[Off-Page]`, `[Local]`, `[Schema]`.
|
| 214 |
+
6. **Platform Tip:** If applicable, include CMS or framework advice
|
|
|
|
| 215 |
(e.g. `"WordPress: use Yoast SEO"`, `"Next.js: use next/head"`).
|
| 216 |
+
7. **Priority Classification:**
|
| 217 |
- **High:** Any metric with score `"critical"` or <β―60, or impact β₯β―10%.
|
| 218 |
- **Medium:** Score 60β79 or impact 5β9%.
|
| 219 |
- **Low:** Score 80β100 or impact <β―5%.
|
| 220 |
- **Unknown:** No score or impact data available.
|
| 221 |
+
8. Explain in easy english, avoiding technical jargon and explaination for technical terms.
|
| 222 |
+
|
| 223 |
|
| 224 |
Important:
|
| 225 |
- Respond with *only* a valid JSON object.
|