Hammad712 commited on
Commit
c1b303d
Β·
1 Parent(s): 8f3347a

first commit

Browse files
app/config.py DELETED
@@ -1,57 +0,0 @@
1
- from pydantic_settings import BaseSettings, SettingsConfigDict
2
-
3
- class Settings(BaseSettings):
4
- """Application settings loaded from environment variables."""
5
-
6
- # ───────────────────────────────────────────────────────────────────────────
7
- # Google API Keys
8
- # ───────────────────────────────────────────────────────────────────────────
9
- pagespeed_api_key: str
10
- gemini_api_key: str
11
-
12
- # ───────────────────────────────────────────────────────────────────────────
13
- # Chat & RAG Configuration
14
- # ───────────────────────────────────────────────────────────────────────────
15
- groq_api_key: str
16
- vectorstore_base_path: str = "./vectorstores"
17
-
18
- # ───────────────────────────────────────────────────────────────────────────
19
- # Hugging Face Hub
20
- # ───────────────────────────────────────────────────────────────────────────
21
- huggingfacehub_api_token: str
22
-
23
- # ───────────────────────────────────────────────────────────────────────────
24
- # MongoDB Configuration (Local)
25
- # ───────────────────────────────────────────────────────────────────────────
26
- mongo_uri: str = "mongodb://localhost:27017"
27
- mongo_chat_db: str = "Education_chatbot"
28
- mongo_chat_collection: str = "chat_histories"
29
-
30
- # ───────────────────────────────────────────────────────────────────────────
31
- # FastAPI Server Configuration
32
- # ───────────────────────────────────────────────────────────────────────────
33
- host: str = "0.0.0.0"
34
- port: int = 8000
35
- debug: bool = False
36
-
37
- # ───────────────────────────────────────────────────────────────────────────
38
- # App Metadata (unchanged)
39
- # ───────────────────────────────────────────────────────────────────────────
40
- app_name: str = "PageSpeed Insights Report Generator"
41
- app_version: str = "1.0.0"
42
- app_description: str = (
43
- "Professional API for generating PageSpeed Insights reports "
44
- "using Google's APIs and Gemini AI"
45
- )
46
-
47
- # ───────────────────────────────────────────────────────────────────────────
48
- # Tell Pydantic to load from .env and ignore extras
49
- # ───────────────────────────────────────────────────────────────────────────
50
- model_config = SettingsConfigDict(
51
- env_file=".env",
52
- env_file_encoding="utf-8",
53
- extra="ignore",
54
- )
55
-
56
- # Single shared Settings instance
57
- settings = Settings()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/main.py CHANGED
@@ -5,24 +5,16 @@ import time
5
  import logging
6
  import json
7
  from datetime import datetime
8
- from fastapi import FastAPI, HTTPException, Depends
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from fastapi.responses import JSONResponse
11
  from contextlib import asynccontextmanager
12
 
13
- from app.config import settings
14
- from app.models import (
15
- PageSpeedRequest,
16
- PageSpeedDataResponse,
17
- ReportRequest,
18
- ReportResponse,
19
- HealthResponse,
20
- PriorityRequest,
21
- PriorityResponse
22
- )
23
- from app.services import PageSpeedService
24
  from app.rag.routes import router as rag_router
25
  from app.seo import routes as seo_routes
 
26
 
27
 
28
  # ------------------------
@@ -67,6 +59,8 @@ app.include_router(rag_router)
67
 
68
  app.include_router(seo_routes.router)
69
 
 
 
70
 
71
  # Add CORS middleware
72
  app.add_middleware(
@@ -77,12 +71,6 @@ app.add_middleware(
77
  allow_headers=["*"],
78
  )
79
 
80
- # Dependency to get PageSpeed service
81
- def get_pagespeed_service() -> PageSpeedService:
82
- """Dependency to get a new PageSpeedService instance."""
83
- return PageSpeedService()
84
-
85
-
86
  @app.get("/", response_model=dict)
87
  async def root():
88
  """Root endpoint with API information."""
@@ -113,124 +101,6 @@ async def health_check():
113
  )
114
 
115
 
116
- @app.post("/pagespeed", response_model=PageSpeedDataResponse)
117
- async def fetch_pagespeed(
118
- request: PageSpeedRequest,
119
- service: PageSpeedService = Depends(get_pagespeed_service)
120
- ):
121
- """
122
- Fetch raw PageSpeed Insights data for a given URL.
123
-
124
- Request body:
125
- {
126
- "url": "https://www.example.com"
127
- }
128
-
129
- Returns:
130
- {
131
- "success": true,
132
- "url": "https://www.example.com",
133
- "pagespeed_data": { ... },
134
- "error": null
135
- }
136
- """
137
- url_str = str(request.url)
138
- logger.info("Received POST /pagespeed for URL: %s", url_str)
139
-
140
- try:
141
- pagespeed_data = service.get_pagespeed_data(url_str)
142
- logger.info("Returning PageSpeed data for %s", url_str)
143
- return PageSpeedDataResponse(
144
- success=True,
145
- url=url_str,
146
- pagespeed_data=pagespeed_data,
147
- error=None
148
- )
149
- except Exception as e:
150
- logger.error("Error in /pagespeed endpoint for URL %s: %s", url_str, e, exc_info=True)
151
- return PageSpeedDataResponse(
152
- success=False,
153
- url=url_str,
154
- pagespeed_data=None,
155
- error=str(e)
156
- )
157
-
158
-
159
- @app.post("/generate-report", response_model=ReportResponse)
160
- async def generate_report(
161
- body: ReportRequest,
162
- service: PageSpeedService = Depends(get_pagespeed_service)
163
- ):
164
- """
165
- Generate a Gemini-based optimization report from previously-fetched PageSpeed JSON.
166
-
167
- Request body:
168
- {
169
- "pagespeed_data": { …full PageSpeed JSON… }
170
- }
171
-
172
- Returns:
173
- {
174
- "success": true,
175
- "report": "Gemini-generated analysis…",
176
- "error": null
177
- }
178
- """
179
- logger.info("Received POST /generate-report")
180
-
181
- try:
182
- pagespeed_data = body.pagespeed_data
183
- logger.debug("PageSpeed JSON payload size: %d bytes", len(str(pagespeed_data)))
184
-
185
- report_text = service.generate_report_with_gemini(pagespeed_data)
186
- logger.info("Returning Gemini report.")
187
- return ReportResponse(
188
- success=True,
189
- report=report_text,
190
- error=None
191
- )
192
- except Exception as e:
193
- logger.error("Error in /generate-report endpoint: %s", e, exc_info=True)
194
- return ReportResponse(
195
- success=False,
196
- report=None,
197
- error=str(e)
198
- )
199
-
200
-
201
- @app.post("/generate-priorities", response_model=PriorityResponse)
202
- async def generate_priorities(
203
- request: PriorityRequest,
204
- service: PageSpeedService = Depends(get_pagespeed_service)
205
- ):
206
- """
207
- Generate a prioritized list of performance improvements from a Gemini report.
208
-
209
- Request body:
210
- {
211
- "report": "Full Gemini-generated performance report..."
212
- }
213
-
214
- Returns:
215
- {
216
- "success": true,
217
- "priorities": {
218
- "High": ["Optimize TBT by reducing JS execution", ...],
219
- "Medium": [...],
220
- "Low": [...]
221
- },
222
- "error": null
223
- }
224
- """
225
- logger.info("Received POST /generate-priorities")
226
- try:
227
- priorities = service.generate_priority(request.report)
228
- return PriorityResponse(success=True, priorities=priorities)
229
- except Exception as e:
230
- logger.error("Error in /generate-priorities: %s", e, exc_info=True)
231
- return PriorityResponse(success=False, priorities=None, error=str(e))
232
-
233
-
234
  @app.exception_handler(404)
235
  async def not_found_handler(request, exc):
236
  """Custom 404 handler."""
@@ -244,7 +114,6 @@ async def not_found_handler(request, exc):
244
  }
245
  )
246
 
247
-
248
  @app.exception_handler(500)
249
  async def internal_error_handler(request, exc):
250
  """Custom 500 handler."""
 
5
  import logging
6
  import json
7
  from datetime import datetime
8
+ from fastapi import FastAPI
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from fastapi.responses import JSONResponse
11
  from contextlib import asynccontextmanager
12
 
13
+ from app.page_speed.config import settings
14
+ from app.page_speed.models import HealthResponse
 
 
 
 
 
 
 
 
 
15
  from app.rag.routes import router as rag_router
16
  from app.seo import routes as seo_routes
17
+ from app.page_speed import routes as page_speed_routes
18
 
19
 
20
  # ------------------------
 
59
 
60
  app.include_router(seo_routes.router)
61
 
62
+ # Mount PageSpeed router
63
+ app.include_router(page_speed_routes.router)
64
 
65
  # Add CORS middleware
66
  app.add_middleware(
 
71
  allow_headers=["*"],
72
  )
73
 
 
 
 
 
 
 
74
  @app.get("/", response_model=dict)
75
  async def root():
76
  """Root endpoint with API information."""
 
101
  )
102
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  @app.exception_handler(404)
105
  async def not_found_handler(request, exc):
106
  """Custom 404 handler."""
 
114
  }
115
  )
116
 
 
117
  @app.exception_handler(500)
118
  async def internal_error_handler(request, exc):
119
  """Custom 500 handler."""
app/page_speed/__init__.py ADDED
File without changes
app/page_speed/config.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+ from urllib.parse import quote_plus
3
+
4
+ class Settings(BaseSettings):
5
+ # Google API Keys
6
+ pagespeed_api_key: str
7
+ gemini_api_key: str
8
+
9
+ # Chat & RAG Configuration
10
+ groq_api_key: str
11
+ vectorstore_base_path: str = "./vectorstores"
12
+
13
+ # Hugging Face
14
+ huggingfacehub_api_token: str
15
+
16
+ # MongoDB Config
17
+ mongo_password: str
18
+ mongo_chat_db: str = "MAAS"
19
+ mongo_chat_collection: str = "chat_histories"
20
+
21
+ # FastAPI Server Config
22
+ host: str = "0.0.0.0"
23
+ port: int = 8000
24
+ debug: bool = False
25
+
26
+ # MongoDB Atlas URI (Dynamically Constructed)
27
+ @property
28
+ def mongo_uri(self):
29
+ encoded_pwd = quote_plus(self.mongo_password)
30
+ return f"mongodb+srv://Hammad:{encoded_pwd}@cluster0.oi9z5ig.mongodb.net/{self.mongo_chat_db}?retryWrites=true&w=majority"
31
+
32
+ model_config = SettingsConfigDict(
33
+ env_file=".env",
34
+ env_file_encoding="utf-8",
35
+ extra="ignore"
36
+ )
37
+
38
+ # Global settings instance
39
+ settings = Settings()
app/{models.py β†’ page_speed/models.py} RENAMED
@@ -109,4 +109,14 @@ class PriorityRequest(BaseModel):
109
  class PriorityResponse(BaseModel):
110
  success: bool
111
  priorities: Optional[Dict[str, List[str]]] = None
112
- error: Optional[str] = None
 
 
 
 
 
 
 
 
 
 
 
109
  class PriorityResponse(BaseModel):
110
  success: bool
111
  priorities: Optional[Dict[str, List[str]]] = None
112
+ error: Optional[str] = None
113
+
114
+ class AnalyzeRequest(BaseModel):
115
+ url: HttpUrl
116
+
117
+ class AnalyzeResponse(BaseModel):
118
+ success: bool
119
+ url: HttpUrl
120
+ report: str | None
121
+ priorities: dict | None
122
+ error: str | None
app/page_speed/routes.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends
2
+ from app.page_speed.models import (
3
+ AnalyzeRequest,
4
+ AnalyzeResponse
5
+ )
6
+
7
+ import logging
8
+
9
+ from app.page_speed.services import PageSpeedService
10
+
11
+ router = APIRouter(prefix="/pagespeed", tags=["PageSpeed"])
12
+
13
+ """
14
+ PageSpeed Insights API routes for analyzing URLs and generating reports.
15
+ """
16
+
17
+ logger = logging.getLogger("app.page_speed.routes")
18
+ logger.setLevel(logging.INFO)
19
+
20
+
21
+ def get_pagespeed_service() -> PageSpeedService:
22
+ """Dependency to get a new PageSpeedService instance."""
23
+ return PageSpeedService()
24
+
25
+
26
+ @router.post("/analyze-url", response_model=AnalyzeResponse)
27
+ async def analyze_url(
28
+ request: AnalyzeRequest,
29
+ service: PageSpeedService = Depends(get_pagespeed_service)
30
+ ):
31
+ """
32
+ One-stop endpoint to fetch PageSpeed data, generate report, and derive priorities.
33
+
34
+ - Takes a single 'url' field in the body.
35
+ - Returns pagespeed_data, human-friendly report, and priority lists.
36
+ """
37
+ url_str = str(request.url)
38
+ logger.info("Received POST /analyze-url for URL: %s", url_str)
39
+
40
+ try:
41
+ # 1. Fetch raw PageSpeed Insights data
42
+ pagespeed_data = service.get_pagespeed_data(url_str)
43
+ logger.debug("Fetched PageSpeed data (bytes=%d)", len(str(pagespeed_data)))
44
+
45
+ # 2. Generate text report via Gemini
46
+ report_text = service.generate_report_with_gemini(pagespeed_data)
47
+ logger.debug("Generated report text (chars=%d)", len(report_text))
48
+
49
+ # 3. Produce prioritized improvements
50
+ priorities = service.generate_priority(report_text)
51
+ logger.info("Analysis complete for %s", url_str)
52
+
53
+ return AnalyzeResponse(
54
+ success=True,
55
+ url=url_str,
56
+ report=report_text,
57
+ priorities=priorities,
58
+ error=None
59
+ )
60
+ except Exception as e:
61
+ logger.error("Error in /analyze-url: %s", e, exc_info=True)
62
+ return AnalyzeResponse(
63
+ success=False,
64
+ url=url_str,
65
+ report=None,
66
+ priorities=None,
67
+ error=str(e)
68
+ )
app/{services.py β†’ page_speed/services.py} RENAMED
@@ -6,7 +6,7 @@ import requests
6
  import logging
7
  import google.generativeai as genai
8
  from typing import Dict, Any
9
- from app.config import settings
10
 
11
  # Create a module-level logger
12
  logger = logging.getLogger(__name__)
@@ -116,106 +116,78 @@ class PageSpeedService:
116
 
117
  def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
118
  """
119
- Create the specialized prompt for Gemini analysis.
120
 
121
  Args:
122
  pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
123
 
124
  Returns:
125
- str: Formatted prompt for Gemini
126
  """
127
  logger.debug("Building Gemini analysis prompt from PageSpeed data.")
128
  return f"""
 
129
  You are an **Expert Web Performance Optimization Consultant**. The following JSON `{pagespeed_data}` contains exactly these keys (all required):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- ```
132
- {{
133
- "url": string, // analyzed page URL
134
- "origin": string, // origin domain
135
- "loading_experience": {{ // Chrome UX data for URL
136
- "overall_category": "FAST"|"AVERAGE"|"SLOW",
137
- "metrics": {{
138
- "CLS": {{ "percentile": number, "category": string }},
139
- "TTFB": {{ "percentile": number, "category": string }},
140
- "FCP": {{ "percentile": number, "category": string }},
141
- "INP": {{ "percentile": number, "category": string }}
142
- }}
143
- }},
144
- "origin_loading_experience": {{ // Chrome UX data for origin
145
- "overall_category": "FAST"|"AVERAGE"|"SLOW"
146
- }},
147
- "lighthouse_audits": [ // only audits with score <1 or notApplicable
148
- {{
149
- "id": string, // audit identifier
150
- "numeric_value": number, // ms or unit value
151
- "score": number|null, // 0–1 or null if N/A
152
- "description": string, // audit title/description
153
- "details": {{ // optional details for resource URLs
154
- "items": [ {{ "url": string }} ]
155
- }},
156
- "metric_savings_ms"?: number // if available
157
- }}
158
- ]
159
- }}
160
- ```
161
-
162
- Your job: output **exactly** the following JSON reportβ€”no extra keys, no prose outside these structures:
163
-
164
- ```json
165
- {{
166
- "overall_score": integer,
167
- "grade": "A"|"B"|"C"|"D"|"F",
168
- "summary": {{
169
- "CLS": {{ "value": number, "category": string }},
170
- "TTFB": {{ "value": number, "category": string }},
171
- "FCP": {{ "value": number, "category": string }},
172
- "INP": {{ "value": number, "category": string }},
173
- "LCP": {{ "value": number, "score": number }},
174
- "TBT": {{ "value": number, "score": number }}
175
- }},
176
- "top_issues": [string],
177
- "top_opportunities": [string],
178
- "audits": [
179
- {{
180
- "id": string,
181
- "value": number,
182
- "score": number|null,
183
- "resource_url"?: string, // first offending URL from details.items
184
- "status": "critical"|"needs_improvement"|"good",
185
- "recommendation": string,
186
- "expected_gain_s": number
187
- }}
188
- ],
189
- "action_plan": [
190
- {{
191
- "id": string,
192
- "fix": string,
193
- "platform_tip"?: string, // e.g. Next.js `next/image` or WordPress-specific advice
194
- "effort": "low"|"medium"|"high"
195
- }}
196
- ],
197
- "monitoring": {{
198
- "frequency": string,
199
- "methods": [string],
200
- "ci_snippet"?: string // optional GitHub Action or Lighthouse CI config
201
- }}
202
- }}```
203
- **Requirements:**
204
- - **Strict Mapping:** Every field derives from `{{PSI_DATA}}` (use JSON paths like `lighthouseResult.audits[...].numeric_value`).
205
- - **No Extra Text:** Only the JSON above.
206
- - **Tie to JSON Paths:** Include resource URLs via `details.items[0].url`.
207
- - **Exact Code Snippets:** Provide `<link rel="preload"...>` or `<script defer>` snippets.
208
- - **Quantify Impact:** Use `metric_savings_ms` for each audit to calculate `expected_gain_s`.
209
- - **Threshold Targets:** State target values, e.g. "Reduce LCP to ≀1200β€―ms".
210
- - **Platform‑Specific Tips:** If known, include stack advice, e.g. Next.js `next/image` or WordPress plugins.
211
- - **Monitoring CI:** Optionally include a GitHub Action snippet:
212
- ```yaml
213
- - uses: treosh/lighthouse-ci-action@v5
214
- with:
215
- configPath: .lighthouserc.json
216
- ```
217
- - **Deterministic Scoring & Priority:** Same as before.
218
- """
219
 
220
 
221
  def analyze_url(self, url: str) -> Dict[str, Any]:
@@ -295,16 +267,15 @@ Classification Rules:
295
  2. **Measurable Target:** Include the numeric goal (e.g., "Reduce LCP to ≀1200β€―ms").
296
  3. **Resource Context:** Embed the resource URL or file name when relevant.
297
  4. **Expected Savings:** Append expected savings in seconds (from `metric_savings_ms`).
298
- 5. **Effort Estimate:** Add an effort estimate (e.g., "Effort: Medium (β‰ˆ2β€―hrs)").
299
- 6. **Code Snippet:** Provide a ready‑to‑copy snippet if applicable (e.g., `<img loading="lazy" src=...>`).
300
- 7. **Category Tag:** Prefix with optimization domain `[Image]`, `[CSS]`, `[JS]`, `[Server]`.
301
- 8. **Impact Score:** Append a simple impact rating (e.g., "Impact: β­β­β­β˜†β˜†" or "% of total savings").
302
- 9. **Platform Tip:** If known, include stack‑specific advice (e.g., Next.js `next/image`).
303
- 10. **Priority Classification:**
304
  - High: Savings β‰₯ 1.5 seconds or score < 0.25
305
  - Medium: Savings between 0.5 and 1.49 seconds or score 0.25 to 0.50
306
  - Low: Savings < 0.5 seconds or score between 0.51 and 1.0
307
  - Unknown: No savings or score data available
 
308
 
309
  Important:
310
  - Respond with *only* a valid JSON object.
 
6
  import logging
7
  import google.generativeai as genai
8
  from typing import Dict, Any
9
+ from app.page_speed.config import settings
10
 
11
  # Create a module-level logger
12
  logger = logging.getLogger(__name__)
 
116
 
117
  def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
118
  """
119
+ Create the specialized prompt for Gemini analysis in a human-readable format.
120
 
121
  Args:
122
  pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
123
 
124
  Returns:
125
+ str: Human-readable, user-friendly report prompt
126
  """
127
  logger.debug("Building Gemini analysis prompt from PageSpeed data.")
128
  return f"""
129
+ <<<<<<< HEAD:app/services.py
130
  You are an **Expert Web Performance Optimization Consultant**. The following JSON `{pagespeed_data}` contains exactly these keys (all required):
131
+ =======
132
+ You are an **Expert Web Performance Optimization Consultant**. The following JSON `{{pagespeed_data}}` includes detailed website performance metrics from Google PageSpeed Insights.
133
+ >>>>>>> 574c6ac (Update endpoints):app/page_speed/services.py
134
+
135
+ Your task is to analyze this data and generate a human-friendly performance **report in plain English**. The report will be read by a **non-technical business owner**, so keep it understandable while explaining technical concepts briefly when necessary.
136
+
137
+ ### Format of Your Response:
138
+ Respond with a **natural language summary (not JSON)**. It should read like a report, not like code or technical output.
139
+
140
+ ---
141
+
142
+ ### Your report must include the following sections:
143
+
144
+ 1. **Overall Performance Summary**
145
+ - Explain how fast the website feels to users.
146
+ - Mention the overall category (FAST, AVERAGE, SLOW) and what that means.
147
+ - If origin data differs from page data, point it out.
148
+
149
+ 2. **Key Metrics Breakdown**
150
+ - For each metric (`CLS`, `TTFB`, `FCP`, `INP`, `LCP`, `TBT`):
151
+ - Provide the value and performance category (e.g., "good", "needs improvement").
152
+ - Briefly explain what the metric means and how it impacts the user experience.
153
+ - Use simple analogies if possible. (Example: β€œCLS measures layout shift – like if buttons jump around while loading.”)
154
+
155
+ 3. **Top Issues**
156
+ - List and explain the top 3–5 performance problems in plain language.
157
+ - Avoid jargon. Example: β€œToo many large images are slowing down the page.”
158
+
159
+ 4. **Improvement Opportunities**
160
+ - Suggest high-impact actions to improve speed (e.g., compress images, lazy load below-the-fold content).
161
+ - Prioritize based on effort (low/medium/high) and expected time savings.
162
+ - Mention technical fixes where helpful, but **always** explain what they do and **why they help**.
163
+
164
+ 5. **Detailed Audit Notes**
165
+ - Mention any specific URLs or files causing problems (e.g., slow scripts, unoptimized images).
166
+ - For each, explain the issue and estimated time it adds to loading.
167
+ - Be clear and concise.
168
+
169
+ 6. **Recommended Action Plan**
170
+ - Provide a to-do list of concrete fixes with estimated effort levels.
171
+ - If possible, include tips tailored to platforms (e.g., for WordPress or Next.js).
172
+
173
+ 7. **Ongoing Monitoring Advice**
174
+ - Recommend how often they should check performance.
175
+ ---
176
+
177
+ ### Important:
178
+ - Do **not** output JSON or code blocks unless specifically required.
179
+ - Use a tone that's **professional, helpful, and non-technical**.
180
+ - Help the reader understand what needs fixing and why it matters for their website and users.
181
+
182
+ Example phrasing:
183
+ > "Your site currently loads in about 3.2 seconds for most users, which is considered average. Improving this can reduce bounce rates and improve conversions."
184
+
185
+ Be specific and practical. Use values directly from `{{pagespeed_data}}` such as `numeric_value`, `percentile`, and `category` fields.
186
+
187
+ ### PageSpeed Data:
188
+ {json.dumps(pagespeed_data, indent=2)}
189
+ """
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
 
193
  def analyze_url(self, url: str) -> Dict[str, Any]:
 
267
  2. **Measurable Target:** Include the numeric goal (e.g., "Reduce LCP to ≀1200β€―ms").
268
  3. **Resource Context:** Embed the resource URL or file name when relevant.
269
  4. **Expected Savings:** Append expected savings in seconds (from `metric_savings_ms`).
270
+ 5. **Code Snippet:** Provide a ready‑to‑copy snippet if applicable (e.g., `<img loading="lazy" src=...>`).
271
+ 6. **Category Tag:** Prefix with optimization domain `[Image]`, `[CSS]`, `[JS]`, `[Server]`.
272
+ 7. **Platform Tip:** If known, include stack‑specific advice (e.g., Next.js `next/image`).
273
+ 8. **Priority Classification:**
 
 
274
  - High: Savings β‰₯ 1.5 seconds or score < 0.25
275
  - Medium: Savings between 0.5 and 1.49 seconds or score 0.25 to 0.50
276
  - Low: Savings < 0.5 seconds or score between 0.51 and 1.0
277
  - Unknown: No savings or score data available
278
+ 9. Explain in easy english, avoiding technical jargon and explaination for technical terms.
279
 
280
  Important:
281
  - Respond with *only* a valid JSON object.
app/rag/chat_history.py CHANGED
@@ -1,10 +1,12 @@
 
1
  import time
2
  from typing import List, Dict, Any
3
  from pymongo import ReturnDocument
4
 
5
- from app.config import settings
6
  from .db import mongo_client, chat_collection_name
7
  from .embeddings import get_llm
 
8
  from langchain.prompts import ChatPromptTemplate
9
  from .logging_config import logger
10
 
@@ -78,3 +80,22 @@ class ChatHistoryManager:
78
  )
79
  logger.info("Summarized chat %s down to one message", chat_id)
80
  return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import time
3
  from typing import List, Dict, Any
4
  from pymongo import ReturnDocument
5
 
6
+ from app.page_speed.config import settings
7
  from .db import mongo_client, chat_collection_name
8
  from .embeddings import get_llm
9
+ from .utils import get_vectorstore_path # make sure this util is available
10
  from langchain.prompts import ChatPromptTemplate
11
  from .logging_config import logger
12
 
 
80
  )
81
  logger.info("Summarized chat %s down to one message", chat_id)
82
  return True
83
+
84
+ @staticmethod
85
+ def vectorstore_exists(user_id: str) -> bool:
86
+ """
87
+ Check if a vectorstore directory already exists for this user.
88
+ """
89
+ path = get_vectorstore_path(user_id)
90
+ exists = os.path.isdir(path)
91
+ logger.debug("Vectorstore path %s exists: %s", path, exists)
92
+ return exists
93
+
94
+ @staticmethod
95
+ def chat_exists(chat_id: str) -> bool:
96
+ """
97
+ Check if a chat session already exists in Mongo for this chat_id.
98
+ """
99
+ found = coll.count_documents({"session_id": chat_id}, limit=1) > 0
100
+ logger.debug("Chat session %s exists: %s", chat_id, found)
101
+ return found
app/rag/db.py CHANGED
@@ -1,5 +1,5 @@
1
  from pymongo import MongoClient
2
- from app.config import settings
3
 
4
  # ──────────────────────────────────────────────────────────────────────────────
5
  # MongoDB Initialization
 
1
  from pymongo import MongoClient
2
+ from app.page_speed.config import settings
3
 
4
  # ──────────────────────────────────────────────────────────────────────────────
5
  # MongoDB Initialization
app/rag/embeddings.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.prompts import ChatPromptTemplate
5
  from dotenv import load_dotenv
6
 
7
  load_dotenv() # now os.getenv(...) will pick up values from your .env file
 
1
  import os
2
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
4
  from dotenv import load_dotenv
5
 
6
  load_dotenv() # now os.getenv(...) will pick up values from your .env file
app/rag/prompt_library.py CHANGED
@@ -1,7 +1,5 @@
1
  from langchain.prompts import ChatPromptTemplate
2
 
3
-
4
-
5
  # ──────────────────────────────────────────────────────────────────────────────
6
  # 1. Prompt Template for PAGE Speed Insights RAG Chatbot
7
  # ──────────────────────────────────────────────────────────────────────────────
 
1
  from langchain.prompts import ChatPromptTemplate
2
 
 
 
3
  # ──────────────────────────────────────────────────────────────────────────────
4
  # 1. Prompt Template for PAGE Speed Insights RAG Chatbot
5
  # ──────────────────────────────────────────────────────────────────────────────
app/rag/routes.py CHANGED
@@ -1,125 +1,117 @@
1
  import os
2
  import uuid
3
  from fastapi import APIRouter, HTTPException
4
- from typing import Optional
5
 
6
- from .schemas import (
7
- IngestRequest,
8
- IngestResponse,
9
- CreateChatResponse,
10
- ChatRequest,
11
- ChatResponse
12
- )
13
  from .utils import (
14
  text_splitter,
15
  embeddings,
16
- get_vectorstore_path,
17
  save_vectorstore_to_disk,
18
  upsert_vectorstore_metadata,
19
- build_or_load_vectorstore,
20
- build_rag_chain,
21
- initialize_chat_history
22
  )
23
- from .logging_config import logger
24
-
25
  from .chat_history import ChatHistoryManager
26
  from .logging_config import logger
27
 
28
  router = APIRouter(prefix="/rag", tags=["rag"])
29
 
30
- @router.post("/ingest/{user_id}", response_model=IngestResponse)
31
- async def ingest_documents(user_id: str, body: IngestRequest):
32
  """
33
- Ingest a list of text documents into a FAISS vectorstore for this user.
34
- Steps:
35
- 1. Concatenate all documents into one string.
36
- 2. Split into chunks using RecursiveCharacterTextSplitter.
37
- 3. Create a FAISS vectorstore from those chunks.
38
- 4. Save the vectorstore to disk under ./vectorstores/{user_id}/faiss_index.
39
- 5. Upsert metadata in Mongo (user_id -> vectorstore_path).
40
  """
41
- logger.info("Ingestion requested for user_id=%s. Number of docs=%d", user_id, len(body.documents))
42
- try:
43
- # 1. Join all provided documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  all_text = "\n\n".join(body.documents)
45
-
46
- # 2. Split into chunks
47
  text_chunks = text_splitter.split_text(all_text)
48
- logger.info("Split into %d chunks", len(text_chunks))
49
-
50
- # 3. Build FAISS vectorstore
51
  from langchain.vectorstores import FAISS as _FAISS
52
  vs = _FAISS.from_texts(texts=text_chunks, embedding=embeddings)
53
-
54
- # 4. Save to disk
55
- faiss_path = save_vectorstore_to_disk(vs, user_id)
56
- logger.info("Saved FAISS index to %s", faiss_path)
57
-
58
- # 5. Upsert metadata
59
- upsert_vectorstore_metadata(user_id, faiss_path)
60
- logger.info("Upserted vectorstore metadata for user_id=%s", user_id)
61
-
62
- return IngestResponse(
63
- success=True,
64
- message="Vectorstore created successfully.",
65
- user_id=user_id,
66
- vectorstore_path=faiss_path
67
  )
68
- except Exception as e:
69
- logger.error("Error during ingestion for user_id=%s: %s", user_id, e, exc_info=True)
70
- raise HTTPException(status_code=500, detail=f"Ingestion failed: {e}")
71
 
72
- @router.post("/chat/create/{user_id}", response_model=CreateChatResponse)
73
- async def create_chat_session(user_id: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  """
75
- Create a new chat session for this user:
76
- - Generate a chat_id (UUID).
77
- - Initialize an empty MongoDBChatMessageHistory for that chat_id.
78
- - Return the chat_id so the client can use it in subsequent calls.
79
  """
80
- logger.info("Creating new chat session for user_id=%s", user_id)
81
- try:
82
- chat_id = str(uuid.uuid4())
83
-
84
- # Initialize chat history (this writes an empty session to Mongo)
85
- _ = initialize_chat_history(chat_id)
86
- logger.info("Created chat history in Mongo for chat_id=%s", chat_id)
87
-
88
- return CreateChatResponse(
89
- success=True,
90
- message="Chat session created.",
91
- user_id=user_id,
92
- chat_id=chat_id
93
  )
94
- except Exception as e:
95
- logger.error("Error creating chat for user_id=%s: %s", user_id, e, exc_info=True)
96
- raise HTTPException(status_code=500, detail=f"Failed to create chat session: {e}")
97
 
 
 
 
 
 
 
 
98
 
99
- @router.post("/chat/{user_id}/{chat_id}", response_model=ChatResponse)
100
- async def chat_with_user(user_id: str, chat_id: str, prompt_type:str, body: ChatRequest):
101
  question = body.question.strip()
102
- logger.info("Chat request user=%s chat=%s question=%s", user_id, chat_id, question)
103
 
104
  try:
105
- # 1) Ensure session exists
106
- ChatHistoryManager.create_session(chat_id)
107
-
108
- # 2) Summarize long histories
109
  ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
110
 
111
- # 3) Record the user message
112
  ChatHistoryManager.add_message(chat_id, role="human", content=question)
113
 
114
- # 4) Build and invoke the RAG chain
115
- chain = build_rag_chain(user_id, chat_id , prompt_type)
116
  history = ChatHistoryManager.get_messages(chat_id)
117
  result = chain.invoke({"question": question, "chat_history": history})
118
  answer = result.get("answer") or result.get("output_text")
119
  if not answer:
120
  raise Exception("No answer returned from chain")
121
 
122
- # 5) Record the AI response
123
  ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
124
 
125
  return ChatResponse(
@@ -127,17 +119,17 @@ async def chat_with_user(user_id: str, chat_id: str, prompt_type:str, body: Chat
127
  answer=answer,
128
  error=None,
129
  chat_id=chat_id,
130
- user_id=user_id
131
  )
132
 
133
  except HTTPException:
134
  raise
135
  except Exception as e:
136
- logger.error("Error chatting user=%s chat=%s: %s", user_id, chat_id, e, exc_info=True)
137
  return ChatResponse(
138
  success=False,
139
  answer=None,
140
  error=str(e),
141
  chat_id=chat_id,
142
- user_id=user_id
143
  )
 
1
  import os
2
  import uuid
3
  from fastapi import APIRouter, HTTPException
 
4
 
5
+ from .schemas import SetupRequest, ChatRequest, SetupResponse, ChatResponse
 
 
 
 
 
 
6
  from .utils import (
7
  text_splitter,
8
  embeddings,
 
9
  save_vectorstore_to_disk,
10
  upsert_vectorstore_metadata,
11
+ get_vectorstore_path,
12
+ build_rag_chain
 
13
  )
 
 
14
  from .chat_history import ChatHistoryManager
15
  from .logging_config import logger
16
 
17
  router = APIRouter(prefix="/rag", tags=["rag"])
18
 
19
+ @router.post("/initialization/{onboarding_id}", response_model=SetupResponse)
20
+ async def setup_rag_session(onboarding_id: str, body: SetupRequest):
21
  """
22
+ Single endpoint to ingest documents and create a chat session.
23
+ - If vectorstore exists for user_id, skip ingestion.
24
+ - Always create a new chat_id for this session.
 
 
 
 
25
  """
26
+ # 1. Handle vectorstore existence
27
+ vectorstore_path = get_vectorstore_path(onboarding_id)
28
+ if os.path.isdir(vectorstore_path):
29
+ logger.info(
30
+ "Vectorstore exists for onboarding_id=%s at %s; skipping ingestion",
31
+ onboarding_id, vectorstore_path
32
+ )
33
+ vs_path = vectorstore_path
34
+ else:
35
+ if not body.documents:
36
+ logger.error(
37
+ "Vectorstore missing for onboarding_id=%s and no documents provided", onboarding_id
38
+ )
39
+ raise HTTPException(
40
+ status_code=400,
41
+ detail="Vectorstore does not exist; please provide documents to ingest."
42
+ )
43
+ # Ingest new vectorstore
44
  all_text = "\n\n".join(body.documents)
 
 
45
  text_chunks = text_splitter.split_text(all_text)
46
+ logger.info("Split into %d chunks for ingestion", len(text_chunks))
 
 
47
  from langchain.vectorstores import FAISS as _FAISS
48
  vs = _FAISS.from_texts(texts=text_chunks, embedding=embeddings)
49
+ vs_path = save_vectorstore_to_disk(vs, onboarding_id)
50
+ logger.info("Saved FAISS index to %s", vs_path)
51
+ upsert_vectorstore_metadata(onboarding_id, vs_path)
52
+ logger.info(
53
+ "Upserted vectorstore metadata for onboarding_id=%s", onboarding_id
 
 
 
 
 
 
 
 
 
54
  )
 
 
 
55
 
56
+ # Create new chat session
57
+ chat_id = str(uuid.uuid4())
58
+ ChatHistoryManager.create_session(chat_id)
59
+ logger.info(
60
+ "Created new chat session %s for onboarding_id=%s",
61
+ chat_id, onboarding_id
62
+ )
63
+
64
+ return SetupResponse(
65
+ success=True,
66
+ message="RAG setup completed.",
67
+ onboarding_id=onboarding_id,
68
+ chat_id=chat_id,
69
+ vectorstore_path=vs_path
70
+ )
71
+
72
+ @router.post("/chat/{onboarding_id}/{chat_id}", response_model=ChatResponse)
73
+ async def chat_with_user(onboarding_id: str, chat_id: str, prompt_type: str, body: ChatRequest):
74
  """
75
+ Chat endpoint that uses an existing chat session and vectorstore.
76
+ - Validates that the vectorstore exists for onboarding_id.
77
+ - Validates that the chat session exists.
 
78
  """
79
+ # 0. Validate vectorstore
80
+ vectorstore_path = get_vectorstore_path(onboarding_id)
81
+ if not os.path.isdir(vectorstore_path):
82
+ logger.error("Vectorstore not found for onboarding_id=%s", onboarding_id)
83
+ raise HTTPException(
84
+ status_code=400,
85
+ detail="Vectorstore not found for this onboarding_id. Please run /setup first."
 
 
 
 
 
 
86
  )
 
 
 
87
 
88
+ # 1. Ensure chat session exists
89
+ if not ChatHistoryManager.chat_exists(chat_id):
90
+ logger.error("Chat session %s not found for onboarding_id=%s", chat_id, onboarding_id)
91
+ raise HTTPException(
92
+ status_code=404,
93
+ detail=f"Chat session {chat_id} does not exist."
94
+ )
95
 
 
 
96
  question = body.question.strip()
97
+ logger.info("Chat request onboarding_id=%s chat=%s question=%s", onboarding_id, chat_id, question)
98
 
99
  try:
100
+ # Summarize long histories
 
 
 
101
  ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
102
 
103
+ # Record the user message
104
  ChatHistoryManager.add_message(chat_id, role="human", content=question)
105
 
106
+ # Build and invoke the RAG chain
107
+ chain = build_rag_chain(onboarding_id, chat_id, prompt_type)
108
  history = ChatHistoryManager.get_messages(chat_id)
109
  result = chain.invoke({"question": question, "chat_history": history})
110
  answer = result.get("answer") or result.get("output_text")
111
  if not answer:
112
  raise Exception("No answer returned from chain")
113
 
114
+ # Record the AI response
115
  ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
116
 
117
  return ChatResponse(
 
119
  answer=answer,
120
  error=None,
121
  chat_id=chat_id,
122
+ onboarding_id=onboarding_id
123
  )
124
 
125
  except HTTPException:
126
  raise
127
  except Exception as e:
128
+ logger.error("Error chatting onboarding_id=%s chat=%s: %s", onboarding_id, chat_id, e, exc_info=True)
129
  return ChatResponse(
130
  success=False,
131
  answer=None,
132
  error=str(e),
133
  chat_id=chat_id,
134
+ onboarding_id=onboarding_id
135
  )
app/rag/schemas.py CHANGED
@@ -1,33 +1,6 @@
1
  from pydantic import BaseModel, Field
2
  from typing import List, Optional
3
 
4
- class IngestRequest(BaseModel):
5
- """
6
- Request body for ingesting documents into a user's FAISS vector store.
7
- """
8
- documents: List[str] = Field(
9
- ...,
10
- description="A list of text documents (strings) to ingest into the vector store."
11
- )
12
-
13
- class IngestResponse(BaseModel):
14
- """
15
- Response after ingesting documents for a user.
16
- """
17
- success: bool
18
- message: str
19
- user_id: str
20
- vectorstore_path: Optional[str] = None
21
-
22
- class CreateChatResponse(BaseModel):
23
- """
24
- Response after creating a new chat session for a user.
25
- """
26
- success: bool
27
- message: str
28
- user_id: str
29
- chat_id: Optional[str] = None
30
-
31
  class ChatRequest(BaseModel):
32
  """
33
  Body for sending a user message to an existing chat session.
@@ -42,4 +15,14 @@ class ChatResponse(BaseModel):
42
  answer: Optional[str] = None
43
  error: Optional[str] = None
44
  chat_id: str
45
- user_id: str
 
 
 
 
 
 
 
 
 
 
 
1
  from pydantic import BaseModel, Field
2
  from typing import List, Optional
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  class ChatRequest(BaseModel):
5
  """
6
  Body for sending a user message to an existing chat session.
 
15
  answer: Optional[str] = None
16
  error: Optional[str] = None
17
  chat_id: str
18
+ onboarding_id: str
19
+
20
+ class SetupRequest(BaseModel):
21
+ documents: List[str]
22
+
23
+ class SetupResponse(BaseModel):
24
+ success: bool
25
+ message: str
26
+ onboarding_id: str
27
+ chat_id: str
28
+ vectorstore_path: str
app/rag/utils.py CHANGED
@@ -7,7 +7,7 @@ from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
7
  from langchain.memory import ConversationBufferMemory # ← IMPORT THIS
8
  from langchain.chains import ConversationalRetrievalChain
9
 
10
- from app.config import settings
11
  from .db import vectorstore_meta_coll, chat_collection_name
12
  from .embeddings import embeddings, text_splitter, get_llm
13
  from .logging_config import logger
@@ -23,7 +23,7 @@ def get_vectorstore_path(user_id: str) -> str:
23
  """
24
  base_dir = settings.vectorstore_base_path
25
  user_dir = os.path.join(base_dir, user_id)
26
- os.makedirs(user_dir, exist_ok=True)
27
  return user_dir
28
 
29
  # ──────────────────────────────────────────────────────────────────────────────
 
7
  from langchain.memory import ConversationBufferMemory # ← IMPORT THIS
8
  from langchain.chains import ConversationalRetrievalChain
9
 
10
+ from app.page_speed.config import settings
11
  from .db import vectorstore_meta_coll, chat_collection_name
12
  from .embeddings import embeddings, text_splitter, get_llm
13
  from .logging_config import logger
 
23
  """
24
  base_dir = settings.vectorstore_base_path
25
  user_dir = os.path.join(base_dir, user_id)
26
+ # os.makedirs(user_dir, exist_ok=True)
27
  return user_dir
28
 
29
  # ──────────────────────────────────────────────────────────────────────────────
app/seo/__init__.py ADDED
File without changes
app/seo/models.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Any, Dict
3
+
4
+ class SEORequest(BaseModel):
5
+ seo_data: Dict[str, Any]
app/seo/routes.py CHANGED
@@ -1,38 +1,27 @@
1
  from fastapi import APIRouter, HTTPException
2
- from pydantic import BaseModel
3
- from typing import Any, Dict
4
  from .seo_service import SEOService
5
-
6
 
7
  router = APIRouter(prefix="/seo", tags=["SEO"])
8
 
9
  seo_service = SEOService()
10
 
11
-
12
- class SEORequest(BaseModel):
13
- seo_data: Dict[str, Any]
14
-
15
- class SEOPriorityRequest(BaseModel):
16
- report: str
17
-
18
- @router.post("/generate-report")
19
- def generate_seo_report(request: SEORequest):
20
  """
21
- Generate SEO report via Gemini.
22
  """
23
  try:
 
24
  report = seo_service.generate_seo_report(request.seo_data)
25
- return {"success": True, "report": report}
26
- except Exception as e:
27
- raise HTTPException(status_code=500, detail=str(e))
28
-
29
- @router.post("/generate-priority")
30
- def generate_seo_priority(request: SEOPriorityRequest):
31
- """
32
- Generate prioritized SEO suggestions from the report.
33
- """
34
- try:
35
- priority_suggestions = seo_service.generate_seo_priority(request.report)
36
- return {"success": True, "priority_suggestions": priority_suggestions}
37
  except Exception as e:
38
  raise HTTPException(status_code=500, detail=str(e))
 
1
  from fastapi import APIRouter, HTTPException
 
 
2
  from .seo_service import SEOService
3
+ from .models import SEORequest
4
 
5
  router = APIRouter(prefix="/seo", tags=["SEO"])
6
 
7
  seo_service = SEOService()
8
 
9
+ @router.post("/generate-full-report")
10
+ def generate_full_seo_analysis(request: SEORequest):
 
 
 
 
 
 
 
11
  """
12
+ Generate full SEO analysis: report + prioritized suggestions.
13
  """
14
  try:
15
+ # Step 1: Generate SEO report (as a string)
16
  report = seo_service.generate_seo_report(request.seo_data)
17
+
18
+ # Step 2: Generate prioritized SEO suggestions from the report
19
+ priority_suggestions = seo_service.generate_seo_priority(report)
20
+
21
+ return {
22
+ "success": True,
23
+ "report": report,
24
+ "priority_suggestions": priority_suggestions
25
+ }
 
 
 
26
  except Exception as e:
27
  raise HTTPException(status_code=500, detail=str(e))
app/seo/seo_service.py CHANGED
@@ -6,15 +6,11 @@ import requests
6
  import logging
7
  import google.generativeai as genai
8
  from typing import Dict, Any
9
- from app.config import settings
10
 
11
  # Create a module-level logger
12
  glogger = logging.getLogger(__name__)
13
 
14
-
15
-
16
-
17
-
18
  class SEOService:
19
  """
20
  Service class for generating SEO reports via Gemini.
@@ -67,127 +63,105 @@ class SEOService:
67
  Build the advanced prompt for SEO analysis based on the updated specialized template.
68
  """
69
  return f"""
70
- You are an **Expert SEO Consultant** with deep expertise in on‑page, technical, and off‑page SEO.
71
-
72
- The following JSON `{{SEO_DATA}}` contains exactly these keys (all required):
73
-
74
- {json.dumps(seo_data, indent=2)}
75
-
76
- Your task is to output **exactly** the following JSON reportβ€”no additional text, no extra keys, no commentary:
77
 
78
- ```json
79
- {{
80
- "overall_score": integer,
81
- "grade": "A"|"B"|"C"|"D"|"F",
82
- "top_strengths": [string],
83
- "top_issues": [string],
84
- "metrics": [
85
- {{
86
- "name": string,
87
- "value": string|number|boolean|array,
88
- "benchmark": string,
89
- "score": integer,
90
- "status": "good"|"needs_improvement"|"critical",
91
- "why_it_matters": string,
92
- "recommendation": string
93
- }}
94
- ],
95
- "action_plan": [
96
- {{
97
- "metric": string,
98
- "fix": string,
99
- "effort_level": "low"|"medium"|"high"
100
- }}
101
- ],
102
- "monitoring": {{
103
- "frequency": string,
104
- "methods": [string]
105
- }},
106
- "technical_seo": "data_unavailable" | {{
107
- "core_web_vitals": {{
108
- "LCP": string,
109
- "FID": string,
110
- "CLS": string
111
- }},
112
- "page_speed_score": integer,
113
- "lazy_loading": boolean,
114
- "security_headers": [string]
115
- }},
116
- "schema_markup": "data_unavailable" | {{
117
- "structured_data_types": [string],
118
- "valid": boolean
119
- }},
120
- "backlink_profile": "data_unavailable" | {{
121
- "referring_domains": integer,
122
- "toxic_links": integer,
123
- "recommendations": string
124
- }},
125
- "trend_comparison": "data_unavailable" | {{
126
- "previous_score": integer,
127
- "change": "increase"|"decrease"|"no_change",
128
- "comment": string
129
- }}
130
- }}
131
 
132
- Instructions:
133
 
134
- Do not include any text before or after the JSON.
135
 
136
- Evaluate SEO performance holistically across all provided data:
 
 
 
 
137
 
138
- On‑Page SEO (titles, meta, headings, content, images, links)
139
 
140
- Technical SEO (robots.txt, sitemap.xml, indexability, mobile‑friendly, HTTPS, URL structure)
 
 
 
 
 
 
 
 
141
 
142
- Off‑Page SEO (backlink_profile)
143
 
144
- Use deterministic scoring based on internal benchmarks:
 
 
 
 
145
 
146
- SEO Score: ≀50=critical, 51–70=needs_improvement, >70=good
147
 
148
- Meta Title length: 50–60 chars=good, <50 or >60=needs_improvement
 
 
149
 
150
- H1 Tags: exactly 1=good, >1=needs_improvement, 0=critical
151
 
152
- Heading Structure errors: any=critical
 
 
 
 
 
153
 
154
- Image Alt Tags ratio: β‰₯90% good, 50–89% needs_improvement, <50% critical
155
 
156
- sitemapXmlCheck: missing=critical
157
 
158
- robotsTxtCheck: missing=critical
 
 
 
 
159
 
160
- indexabilityCheck: false=critical
161
 
162
- internalLinksCount: <5=needs_improvement
 
 
 
 
163
 
164
- externalLinksCount: <2=needs_improvement
165
 
166
- Advanced sections (technical_seo, schema_markup, backlink_profile, trend_comparison):
 
 
 
 
167
 
168
- If the input data lacks these metrics, set the field value to "data_unavailable".
169
 
170
- Otherwise, populate with real values (e.g., core web vitals, page speed score, backlink counts).
171
 
172
- The action_plan must list the 5 weakest metrics by score, across all sections.
 
 
 
 
 
 
 
 
173
 
174
- Set "monitoring.frequency" to:
 
 
 
 
 
175
 
176
- "weekly" if any metric status is "critical" or "needs_improvement".
 
177
 
178
- "monthly" if all metrics are "good".
179
-
180
- Grading scale:
181
-
182
- 90–100: A
183
-
184
- 80–89: B
185
-
186
- 70–79: C
187
-
188
- 60–69: D
189
-
190
- <60: F
191
  """
192
 
193
  def generate_seo_priority(self, report: str) -> Dict[str, Any]:
@@ -233,19 +207,19 @@ Classification Rules:
233
  2. **Benchmark Comparison:** Include both the **current value** and the **ideal benchmark**
234
  (e.g. `"Current: 15 keywords, Ideal: 1–3% density"`).
235
  3. **Impact Estimate:** Quantify expected SEO impact (e.g. `"+12% CTR"` or `"+0.5 page rank score"`).
236
- 4. **Effort Estimate:** Add an effort estimate (e.g. `"Effort: Low (β‰ˆ1 hr)"`).
237
- 5. **Code Snippet:** Provide a ready‑to‑copy example if applicable
238
  (e.g. `<meta name="description" content="...">`).
239
- 6. **Category Tag:** Prefix with SEO domainβ€”
240
  `[On-Page]`, `[Technical]`, `[Off-Page]`, `[Local]`, `[Schema]`.
241
- 7. **Impact Score:** Append a simple impact rating (e.g. `"Impact: β­β­β­β˜†β˜†"`).
242
- 8. **Platform Tip:** If applicable, include CMS or framework advice
243
  (e.g. `"WordPress: use Yoast SEO"`, `"Next.js: use next/head"`).
244
- 9. **Priority Classification:**
245
  - **High:** Any metric with score `"critical"` or <β€―60, or impact β‰₯β€―10%.
246
  - **Medium:** Score 60–79 or impact 5–9%.
247
  - **Low:** Score 80–100 or impact <β€―5%.
248
  - **Unknown:** No score or impact data available.
 
 
249
 
250
  Important:
251
  - Respond with *only* a valid JSON object.
 
6
  import logging
7
  import google.generativeai as genai
8
  from typing import Dict, Any
9
+ from app.page_speed.config import settings
10
 
11
  # Create a module-level logger
12
  glogger = logging.getLogger(__name__)
13
 
 
 
 
 
14
  class SEOService:
15
  """
16
  Service class for generating SEO reports via Gemini.
 
63
  Build the advanced prompt for SEO analysis based on the updated specialized template.
64
  """
65
  return f"""
66
+ You are an **Expert SEO Consultant** with advanced knowledge of on-page, technical, and off-page SEO.
 
 
 
 
 
 
67
 
68
+ Your task is to analyze this data and return a detailed SEO audit report as a **multi-line string** (not as JSON). Keep it structured, clear, and easy to read β€” for example, using sections, bullet points, and indentation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ Include these sections in your output:
71
 
72
+ ---
73
 
74
+ **Overall Summary**
75
+ - Overall SEO Score: (0–100)
76
+ - Grade: A, B, C, D, or F
77
+ - Top Strengths: List the top 3–5 strong areas
78
+ - Top Issues: List the top 3–5 weak/problematic areas
79
 
80
+ ---
81
 
82
+ **Metric Breakdown**
83
+ For each key metric in the data:
84
+ - Metric Name
85
+ - Value: ...
86
+ - Benchmark: ...
87
+ - Score: ...
88
+ - Status: good / needs improvement / critical
89
+ - Why It Matters: Explain simply
90
+ - Recommendation: What to fix or improve
91
 
92
+ ---
93
 
94
+ **Action Plan**
95
+ List 5 weakest metrics and how to fix them:
96
+ - Metric: ...
97
+ - Fix: ...
98
+ - Effort Level: low / medium / high
99
 
100
+ ---
101
 
102
+ **Monitoring Strategy**
103
+ - Frequency: weekly or monthly (based on severity of issues)
104
+ - Methods: Tools or techniques to track progress
105
 
106
+ ---
107
 
108
+ **Technical SEO**
109
+ If data is available, include:
110
+ - Core Web Vitals (LCP, FID, CLS)
111
+ - Page Speed Score
112
+ - Lazy Loading Enabled
113
+ - Security Headers Present
114
 
115
+ If not available, just write β€œTechnical SEO data not available.”
116
 
117
+ ---
118
 
119
+ **Schema Markup**
120
+ If available:
121
+ - Types Detected
122
+ - Is Valid: Yes/No
123
+ Else: β€œSchema markup data not available.”
124
 
125
+ ---
126
 
127
+ **Backlink Profile**
128
+ If available:
129
+ - Referring Domains
130
+ - Toxic Links
131
+ - Recommendations to improve off-page SEO
132
 
133
+ ---
134
 
135
+ **Trend Comparison**
136
+ If available:
137
+ - Previous Score
138
+ - Score Change (increase, decrease, or no change)
139
+ - Comment
140
 
141
+ ---
142
 
143
+ ### βš™οΈ Scoring Rules Summary (for reference):
144
 
145
+ - SEO Score: ≀50 = critical, 51–70 = needs improvement, >70 = good
146
+ - Meta Title: 50–60 chars = good, else needs improvement
147
+ - H1 Tags: exactly 1 = good, 0 or >1 = needs improvement/critical
148
+ - Heading Errors: any = critical
149
+ - Image Alt Tags: β‰₯90% = good, 50–89% = needs improvement, <50% = critical
150
+ - sitemapXmlCheck / robotsTxtCheck: missing = critical
151
+ - indexabilityCheck: false = critical
152
+ - internalLinksCount: <5 = needs improvement
153
+ - externalLinksCount: <2 = needs improvement
154
 
155
+ Use these rules to calculate metric status and overall grade:
156
+ - 90–100 β†’ A
157
+ - 80–89 β†’ B
158
+ - 70–79 β†’ C
159
+ - 60–69 β†’ D
160
+ - <60 β†’ F
161
 
162
+ SEO data provided in JSON format:
163
+ {seo_data}
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  """
166
 
167
  def generate_seo_priority(self, report: str) -> Dict[str, Any]:
 
207
  2. **Benchmark Comparison:** Include both the **current value** and the **ideal benchmark**
208
  (e.g. `"Current: 15 keywords, Ideal: 1–3% density"`).
209
  3. **Impact Estimate:** Quantify expected SEO impact (e.g. `"+12% CTR"` or `"+0.5 page rank score"`).
210
+ 4. **Code Snippet:** Provide a ready‑to‑copy example if applicable
 
211
  (e.g. `<meta name="description" content="...">`).
212
+ 5. **Category Tag:** Prefix with SEO domainβ€”
213
  `[On-Page]`, `[Technical]`, `[Off-Page]`, `[Local]`, `[Schema]`.
214
+ 6. **Platform Tip:** If applicable, include CMS or framework advice
 
215
  (e.g. `"WordPress: use Yoast SEO"`, `"Next.js: use next/head"`).
216
+ 7. **Priority Classification:**
217
  - **High:** Any metric with score `"critical"` or <β€―60, or impact β‰₯β€―10%.
218
  - **Medium:** Score 60–79 or impact 5–9%.
219
  - **Low:** Score 80–100 or impact <β€―5%.
220
  - **Unknown:** No score or impact data available.
221
+ 8. Explain in easy english, avoiding technical jargon and explaination for technical terms.
222
+
223
 
224
  Important:
225
  - Respond with *only* a valid JSON object.