Hammad712 commited on
Commit
54c31ea
·
1 Parent(s): b1f7307

Added SEO report generation and updated prompts

Browse files
.gitignore CHANGED
@@ -4,6 +4,7 @@ __pycache__/
4
  *$py.class
5
 
6
  # Virtual environment
 
7
  venv/
8
  env/
9
  .myenv/
 
4
  *$py.class
5
 
6
  # Virtual environment
7
+ .venv/
8
  venv/
9
  env/
10
  .myenv/
app/main.py CHANGED
@@ -22,6 +22,8 @@ from app.models import (
22
  )
23
  from app.services import PageSpeedService
24
  from app.rag.routes import router as rag_router
 
 
25
 
26
  # ------------------------
27
  # Configure root logger
@@ -63,6 +65,9 @@ app = FastAPI(
63
  # Mount RAG router
64
  app.include_router(rag_router)
65
 
 
 
 
66
  # Add CORS middleware
67
  app.add_middleware(
68
  CORSMiddleware,
 
22
  )
23
  from app.services import PageSpeedService
24
  from app.rag.routes import router as rag_router
25
+ from app.seo import routes as seo_routes
26
+
27
 
28
  # ------------------------
29
  # Configure root logger
 
65
  # Mount RAG router
66
  app.include_router(rag_router)
67
 
68
+ app.include_router(seo_routes.router)
69
+
70
+
71
  # Add CORS middleware
72
  app.add_middleware(
73
  CORSMiddleware,
app/rag/embeddings.py CHANGED
@@ -44,32 +44,3 @@ encode_kwargs = {"normalize_embeddings": True}
44
  embeddings = HuggingFaceBgeEmbeddings(
45
  model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
46
  )
47
- # ──────────────────────────────────────────────────────────────────────────────
48
- # 3. Prompt Template for RAG Assistant
49
- # ──────────────────────────────────────────────────────────────────────────────
50
- prompt_template = """
51
- You are an assistant specialized in analyzing and improving website performance. Your goal is to provide accurate, practical, and performance-driven answers.
52
- Use the following retrieved context (such as PageSpeed Insights data or audit results) to answer the user's question.
53
- If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
54
-
55
- Guidelines:
56
- 1. Extract relevant performance insights from the context to form a helpful and actionable response.
57
- 2. Maintain a clear, professional, and user-focused tone.
58
- 3. If the question is unclear or needs more detail, ask for clarification politely.
59
- 4. Prioritize recommendations that follow web performance best practices (e.g., optimizing load times, reducing blocking resources, improving visual stability).
60
-
61
- Retrieved context:
62
- {context}
63
-
64
- User's question:
65
- {question}
66
-
67
- Your response:
68
- """
69
-
70
- user_prompt = ChatPromptTemplate.from_messages(
71
- [
72
- ("system", prompt_template),
73
- ("human", "{question}"),
74
- ]
75
- )
 
44
  embeddings = HuggingFaceBgeEmbeddings(
45
  model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
46
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/rag/prompt_library.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import ChatPromptTemplate
2
+
3
+
4
+
5
+ # ──────────────────────────────────────────────────────────────────────────────
6
+ # 1. Prompt Template for PAGE Speed Insights RAG Chatbot
7
+ # ──────────────────────────────────────────────────────────────────────────────
8
+ prompt_template = """
9
+ You are an assistant specialized in analyzing and improving website performance. Your goal is to provide accurate, practical, and performance-driven answers.
10
+ Use the following retrieved context (such as PageSpeed Insights data or audit results) to answer the user's question.
11
+ If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
12
+
13
+ Guidelines:
14
+ 1. Extract relevant performance insights from the context to form a helpful and actionable response.
15
+ 2. Maintain a clear, professional, and user-focused tone.
16
+ 3. If the question is unclear or needs more detail, ask for clarification politely.
17
+ 4. Prioritize recommendations that follow web performance best practices (e.g., optimizing load times, reducing blocking resources, improving visual stability).
18
+
19
+ Retrieved context:
20
+ {context}
21
+
22
+ User's question:
23
+ {question}
24
+
25
+ Your response:
26
+ """
27
+
28
+ page_speed_prompt = ChatPromptTemplate.from_messages(
29
+ [
30
+ ("system", prompt_template),
31
+ ("human", "{question}"),
32
+ ]
33
+ )
34
+
35
+
36
+
37
+ # ──────────────────────────────────────────────────────────────────────────────
38
+ # 2. Prompt Template for Default RAG Chatbot
39
+ # ──────────────────────────────────────────────────────────────────────────────
40
+ default_user_prompt_template = """You are an assistant specialized in answering user questions based on the provided context.
41
+ Use the following retrieved context to answer the user's question.
42
+ If the context lacks sufficient information, respond with "I don't know."
43
+ Do not make up answers or provide unverified information.
44
+ Retrieved context:
45
+ {context}
46
+ User's question:
47
+ {question}
48
+ Your response:
49
+ """
50
+ default_user_prompt = ChatPromptTemplate.from_messages(
51
+ [
52
+ ("system", default_user_prompt_template),
53
+ ("human", "{question}"),
54
+ ]
55
+ )
56
+
57
+ # ──────────────────────────────────────────────────────────────────────────────
58
+ # 3. Prompt Template for SEO RAG Chatbot
59
+ # ──────────────────────────────────────────────────────────────────────────────
60
+ seo_prompt_template = """You are an SEO assistant specialized in analyzing and improving website search engine optimization.
61
+ Use the following retrieved context to answer the user's question.
62
+ If the context lacks sufficient information, respond with "I don't know."
63
+ Do not make up answers or provide unverified information.
64
+ Retrieved context:
65
+ {context}
66
+ User's question:
67
+ {question}
68
+ Your response:
69
+ """
70
+ seo_prompt = ChatPromptTemplate.from_messages(
71
+ [
72
+ ("system", seo_prompt_template),
73
+ ("human", "{question}"),
74
+ ]
75
+ )
app/rag/routes.py CHANGED
@@ -97,7 +97,7 @@ async def create_chat_session(user_id: str):
97
 
98
 
99
  @router.post("/chat/{user_id}/{chat_id}", response_model=ChatResponse)
100
- async def chat_with_user(user_id: str, chat_id: str, body: ChatRequest):
101
  question = body.question.strip()
102
  logger.info("Chat request user=%s chat=%s question=%s", user_id, chat_id, question)
103
 
@@ -112,7 +112,7 @@ async def chat_with_user(user_id: str, chat_id: str, body: ChatRequest):
112
  ChatHistoryManager.add_message(chat_id, role="human", content=question)
113
 
114
  # 4) Build and invoke the RAG chain
115
- chain = build_rag_chain(user_id, chat_id)
116
  history = ChatHistoryManager.get_messages(chat_id)
117
  result = chain.invoke({"question": question, "chat_history": history})
118
  answer = result.get("answer") or result.get("output_text")
 
97
 
98
 
99
  @router.post("/chat/{user_id}/{chat_id}", response_model=ChatResponse)
100
+ async def chat_with_user(user_id: str, chat_id: str, prompt_type:str, body: ChatRequest):
101
  question = body.question.strip()
102
  logger.info("Chat request user=%s chat=%s question=%s", user_id, chat_id, question)
103
 
 
112
  ChatHistoryManager.add_message(chat_id, role="human", content=question)
113
 
114
  # 4) Build and invoke the RAG chain
115
+ chain = build_rag_chain(user_id, chat_id , prompt_type)
116
  history = ChatHistoryManager.get_messages(chat_id)
117
  result = chain.invoke({"question": question, "chat_history": history})
118
  answer = result.get("answer") or result.get("output_text")
app/rag/utils.py CHANGED
@@ -9,8 +9,9 @@ from langchain.chains import ConversationalRetrievalChain
9
 
10
  from app.config import settings
11
  from .db import vectorstore_meta_coll, chat_collection_name
12
- from .embeddings import embeddings, text_splitter, user_prompt, get_llm
13
  from .logging_config import logger
 
14
 
15
  # ──────────────────────────────────────────────────────────────────────────────
16
  # 1. Helper: Path to Store (or Load) a User's FAISS Vectorstore on Disk
@@ -96,7 +97,7 @@ def initialize_chat_history(chat_id: str) -> MongoDBChatMessageHistory:
96
  # ──────────────────────────────────────────────────────────────────────────────
97
  # 6. Build a ConversationalRetrievalChain (RAG Chain) for user_id + chat_id
98
  # ──────────────────────────────────────────────────────────────────────────────
99
- def build_rag_chain(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
100
  """
101
  - Loads the FAISS index for user_id.
102
  - Creates a retriever (k=3).
@@ -123,6 +124,16 @@ def build_rag_chain(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
123
  # 4. Get the LLM
124
  llm = get_llm()
125
 
 
 
 
 
 
 
 
 
 
 
126
  # 5. Build the ConversationalRetrievalChain with the wrapped memory
127
  chain = ConversationalRetrievalChain.from_llm(
128
  llm=llm,
@@ -130,7 +141,7 @@ def build_rag_chain(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
130
  memory=memory, # ← pass the ConversationBufferMemory here
131
  return_source_documents=False,
132
  chain_type="stuff",
133
- combine_docs_chain_kwargs={"prompt": user_prompt},
134
  verbose=False,
135
  )
136
  return chain
 
9
 
10
  from app.config import settings
11
  from .db import vectorstore_meta_coll, chat_collection_name
12
+ from .embeddings import embeddings, text_splitter, get_llm
13
  from .logging_config import logger
14
+ from app.rag.prompt_library import page_speed_prompt, default_user_prompt,seo_prompt
15
 
16
  # ──────────────────────────────────────────────────────────────────────────────
17
  # 1. Helper: Path to Store (or Load) a User's FAISS Vectorstore on Disk
 
97
  # ──────────────────────────────────────────────────────────────────────────────
98
  # 6. Build a ConversationalRetrievalChain (RAG Chain) for user_id + chat_id
99
  # ──────────────────────────────────────────────────────────────────────────────
100
+ def build_rag_chain(user_id: str, chat_id: str, prompt_type: str) -> ConversationalRetrievalChain:
101
  """
102
  - Loads the FAISS index for user_id.
103
  - Creates a retriever (k=3).
 
124
  # 4. Get the LLM
125
  llm = get_llm()
126
 
127
+ if prompt_type == "page_speed":
128
+ # Use the specific prompt for Page Speed Insights
129
+ user_prompt = page_speed_prompt
130
+ elif prompt_type == "seo":
131
+ # Use the specific prompt for SEO
132
+ user_prompt = seo_prompt
133
+ else:
134
+ # Default to the user prompt if no specific type is provided
135
+ user_prompt = default_user_prompt
136
+
137
  # 5. Build the ConversationalRetrievalChain with the wrapped memory
138
  chain = ConversationalRetrievalChain.from_llm(
139
  llm=llm,
 
141
  memory=memory, # ← pass the ConversationBufferMemory here
142
  return_source_documents=False,
143
  chain_type="stuff",
144
+ combine_docs_chain_kwargs={"prompt": user_prompt}, # Use the user prompt for combining docs
145
  verbose=False,
146
  )
147
  return chain
app/seo/routes.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Any, Dict
4
+ from .seo_service import SEOService
5
+
6
+
7
+ router = APIRouter(prefix="/seo", tags=["SEO"])
8
+
9
+ seo_service = SEOService()
10
+
11
+
12
+ class SEORequest(BaseModel):
13
+ seo_data: Dict[str, Any]
14
+
15
+ class SEOPriorityRequest(BaseModel):
16
+ report: str
17
+
18
+ @router.post("/generate-report")
19
+ def generate_seo_report(request: SEORequest):
20
+ """
21
+ Generate SEO report via Gemini.
22
+ """
23
+ try:
24
+ report = seo_service.generate_seo_report(request.seo_data)
25
+ return {"success": True, "report": report}
26
+ except Exception as e:
27
+ raise HTTPException(status_code=500, detail=str(e))
28
+
29
+ @router.post("/generate-priority")
30
+ def generate_seo_priority(request: SEOPriorityRequest):
31
+ """
32
+ Generate prioritized SEO suggestions from the report.
33
+ """
34
+ try:
35
+ priority_suggestions = seo_service.generate_seo_priority(request.report)
36
+ return {"success": True, "priority_suggestions": priority_suggestions}
37
+ except Exception as e:
38
+ raise HTTPException(status_code=500, detail=str(e))
app/seo/seo_service.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Business logic services for PageSpeed and SEO analysis.
3
+ """
4
+ import json
5
+ import requests
6
+ import logging
7
+ import google.generativeai as genai
8
+ from typing import Dict, Any
9
+ from app.config import settings
10
+
11
+ # Create a module-level logger
12
+ glogger = logging.getLogger(__name__)
13
+
14
+
15
+
16
+
17
+
18
+ class SEOService:
19
+ """
20
+ Service class for generating SEO reports via Gemini.
21
+ """
22
+ def __init__(self):
23
+ self.gemini_api_key = settings.gemini_api_key
24
+ if self.gemini_api_key:
25
+ glogger.info("Configuring Gemini AI for SEO reporting.")
26
+ genai.configure(api_key=self.gemini_api_key)
27
+ else:
28
+ glogger.warning("No Gemini API key found. SEO reporting will fail if called.")
29
+
30
+ def generate_seo_report(self, seo_data: Dict[str, Any]) -> str:
31
+ """
32
+ Generate an SEO audit report using Gemini AI.
33
+
34
+ Args:
35
+ seo_data (Dict[str, Any]): Collected SEO metrics in JSON format.
36
+
37
+ Returns:
38
+ str: JSON-formatted SEO report string
39
+
40
+ Raises:
41
+ Exception: If report generation fails
42
+ """
43
+ glogger.info("Starting SEO report generation.")
44
+ if not self.gemini_api_key:
45
+ msg = "Gemini API key not configured"
46
+ glogger.error(msg)
47
+ raise Exception(msg)
48
+
49
+ prompt = self._create_seo_prompt(seo_data)
50
+ glogger.debug("SEO prompt: %s...", prompt[:200])
51
+
52
+ try:
53
+ model = genai.GenerativeModel("gemini-2.0-flash")
54
+ response = model.generate_content(prompt)
55
+ text = getattr(response, "text", None)
56
+ if not text:
57
+ raise Exception("Empty response from Gemini")
58
+ glogger.info("SEO report generated successfully.")
59
+ return text.strip()
60
+ except Exception as e:
61
+ msg = f"Error generating SEO report: {e}"
62
+ glogger.error(msg, exc_info=True)
63
+ raise
64
+
65
+ def _create_seo_prompt(self, seo_data: Dict[str, Any]) -> str:
66
+ """
67
+ Build the advanced prompt for SEO analysis based on the updated specialized template.
68
+ """
69
+ return f"""
70
+ You are an **Expert SEO Consultant** with deep expertise in on‑page, technical, and off‑page SEO.
71
+
72
+ The following JSON `{{SEO_DATA}}` contains exactly these keys (all required):
73
+
74
+ {json.dumps(seo_data, indent=2)}
75
+
76
+ Your task is to output **exactly** the following JSON report—no additional text, no extra keys, no commentary:
77
+
78
+ ```json
79
+ {{
80
+ "overall_score": integer,
81
+ "grade": "A"|"B"|"C"|"D"|"F",
82
+ "top_strengths": [string],
83
+ "top_issues": [string],
84
+ "metrics": [
85
+ {{
86
+ "name": string,
87
+ "value": string|number|boolean|array,
88
+ "benchmark": string,
89
+ "score": integer,
90
+ "status": "good"|"needs_improvement"|"critical",
91
+ "why_it_matters": string,
92
+ "recommendation": string
93
+ }}
94
+ ],
95
+ "action_plan": [
96
+ {{
97
+ "metric": string,
98
+ "fix": string,
99
+ "effort_level": "low"|"medium"|"high"
100
+ }}
101
+ ],
102
+ "monitoring": {{
103
+ "frequency": string,
104
+ "methods": [string]
105
+ }},
106
+ "technical_seo": "data_unavailable" | {{
107
+ "core_web_vitals": {{
108
+ "LCP": string,
109
+ "FID": string,
110
+ "CLS": string
111
+ }},
112
+ "page_speed_score": integer,
113
+ "lazy_loading": boolean,
114
+ "security_headers": [string]
115
+ }},
116
+ "schema_markup": "data_unavailable" | {{
117
+ "structured_data_types": [string],
118
+ "valid": boolean
119
+ }},
120
+ "backlink_profile": "data_unavailable" | {{
121
+ "referring_domains": integer,
122
+ "toxic_links": integer,
123
+ "recommendations": string
124
+ }},
125
+ "trend_comparison": "data_unavailable" | {{
126
+ "previous_score": integer,
127
+ "change": "increase"|"decrease"|"no_change",
128
+ "comment": string
129
+ }}
130
+ }}
131
+
132
+ Instructions:
133
+
134
+ Do not include any text before or after the JSON.
135
+
136
+ Evaluate SEO performance holistically across all provided data:
137
+
138
+ On‑Page SEO (titles, meta, headings, content, images, links)
139
+
140
+ Technical SEO (robots.txt, sitemap.xml, indexability, mobile‑friendly, HTTPS, URL structure)
141
+
142
+ Off‑Page SEO (backlink_profile)
143
+
144
+ Use deterministic scoring based on internal benchmarks:
145
+
146
+ SEO Score: ≤50=critical, 51–70=needs_improvement, >70=good
147
+
148
+ Meta Title length: 50–60 chars=good, <50 or >60=needs_improvement
149
+
150
+ H1 Tags: exactly 1=good, >1=needs_improvement, 0=critical
151
+
152
+ Heading Structure errors: any=critical
153
+
154
+ Image Alt Tags ratio: ≥90% good, 50–89% needs_improvement, <50% critical
155
+
156
+ sitemapXmlCheck: missing=critical
157
+
158
+ robotsTxtCheck: missing=critical
159
+
160
+ indexabilityCheck: false=critical
161
+
162
+ internalLinksCount: <5=needs_improvement
163
+
164
+ externalLinksCount: <2=needs_improvement
165
+
166
+ Advanced sections (technical_seo, schema_markup, backlink_profile, trend_comparison):
167
+
168
+ If the input data lacks these metrics, set the field value to "data_unavailable".
169
+
170
+ Otherwise, populate with real values (e.g., core web vitals, page speed score, backlink counts).
171
+
172
+ The action_plan must list the 5 weakest metrics by score, across all sections.
173
+
174
+ Set "monitoring.frequency" to:
175
+
176
+ "weekly" if any metric status is "critical" or "needs_improvement".
177
+
178
+ "monthly" if all metrics are "good".
179
+
180
+ Grading scale:
181
+
182
+ 90–100: A
183
+
184
+ 80–89: B
185
+
186
+ 70–79: C
187
+
188
+ 60–69: D
189
+
190
+ <60: F
191
+ """
192
+
193
+ def generate_seo_priority(self, report: str) -> Dict[str, Any]:
194
+ """
195
+ Generate a dictionary of prioritized performance recommendations based on the Gemini-generated report.
196
+
197
+ Args:
198
+ report (str): The Gemini-generated performance report
199
+
200
+ Returns:
201
+ Dict[str, Any]: Dictionary mapping priority levels to optimization suggestions
202
+
203
+ Raises:
204
+ Exception: If the priority generation fails
205
+ """
206
+ glogger.info("Generating prioritized suggestions from the Gemini report.")
207
+
208
+ if not self.gemini_api_key:
209
+ msg = "Gemini API key not configured"
210
+ glogger.error(msg)
211
+ raise Exception(msg)
212
+
213
+ try:
214
+ model = genai.GenerativeModel("gemini-2.0-flash")
215
+
216
+ prompt = f"""
217
+ You are an **Expert Web Performance Analyst & Optimization Engineer**.
218
+
219
+ Your task is to carefully analyze the provided PageSpeed Insights performance report.
220
+ Extract **all** optimization recommendations and organize them into a JSON object with exactly these keys:
221
+ - "high"
222
+ - "medium"
223
+ - "low"
224
+ - "unknown"
225
+
226
+ Extract and organize the optimization recommendations from the following performance report
227
+ into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".
228
+ Each key’s value should be a list of suggestion strings.
229
+
230
+ Classification Rules:
231
+ 1. **Metric Reference:** For each suggestion, cite the metric name and full JSON path
232
+ (e.g. `metrics[2].name == "Keyword Density"` or `metrics[6].value`).
233
+ 2. **Benchmark Comparison:** Include both the **current value** and the **ideal benchmark**
234
+ (e.g. `"Current: 15 keywords, Ideal: 1–3% density"`).
235
+ 3. **Impact Estimate:** Quantify expected SEO impact (e.g. `"+12% CTR"` or `"+0.5 page rank score"`).
236
+ 4. **Effort Estimate:** Add an effort estimate (e.g. `"Effort: Low (≈1 hr)"`).
237
+ 5. **Code Snippet:** Provide a ready‑to‑copy example if applicable
238
+ (e.g. `<meta name="description" content="...">`).
239
+ 6. **Category Tag:** Prefix with SEO domain—
240
+ `[On-Page]`, `[Technical]`, `[Off-Page]`, `[Local]`, `[Schema]`.
241
+ 7. **Impact Score:** Append a simple impact rating (e.g. `"Impact: ⭐⭐⭐☆☆"`).
242
+ 8. **Platform Tip:** If applicable, include CMS or framework advice
243
+ (e.g. `"WordPress: use Yoast SEO"`, `"Next.js: use next/head"`).
244
+ 9. **Priority Classification:**
245
+ - **High:** Any metric with score `"critical"` or < 60, or impact ≥ 10%.
246
+ - **Medium:** Score 60–79 or impact 5–9%.
247
+ - **Low:** Score 80–100 or impact < 5%.
248
+ - **Unknown:** No score or impact data available.
249
+
250
+ Important:
251
+ - Respond with *only* a valid JSON object.
252
+ - Do NOT include any commentary or explanation outside the JSON.
253
+
254
+ Performance Report:
255
+ {report}
256
+ """
257
+
258
+
259
+
260
+ response = model.generate_content(prompt)
261
+ raw = (response.text or "").strip()
262
+ glogger.debug("Raw priority response: %s", raw[:500] + ("…" if len(raw) > 500 else ""))
263
+
264
+ # Locate the JSON portion by finding the first '{' and the last '}'
265
+ start = raw.find('{')
266
+ end = raw.rfind('}')
267
+ if start == -1 or end == -1 or end <= start:
268
+ raise ValueError("No JSON object found in Gemini response")
269
+
270
+ json_str = raw[start:end+1]
271
+ glogger.debug("Extracted JSON string: %s", json_str)
272
+
273
+ suggestions = json.loads(json_str)
274
+ if not isinstance(suggestions, dict):
275
+ raise ValueError("Parsed JSON is not a dictionary")
276
+
277
+ # Ensure all expected keys exist
278
+ for key in ("high", "medium", "low", "unknown"):
279
+ suggestions.setdefault(key, [])
280
+
281
+ glogger.info("Priority suggestions generated successfully.")
282
+ return suggestions
283
+
284
+ except json.JSONDecodeError as je:
285
+ msg = f"Failed to parse JSON from Gemini response: {je}"
286
+ glogger.error(msg, exc_info=True)
287
+ raise Exception(msg)
288
+ except Exception as e:
289
+ msg = f"Error generating priority suggestions: {e}"
290
+ glogger.error(msg, exc_info=True)
291
+ raise
292
+
293
+
app/services.py CHANGED
@@ -117,87 +117,106 @@ class PageSpeedService:
117
  def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
118
  """
119
  Create the specialized prompt for Gemini analysis.
120
-
121
  Args:
122
  pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
123
-
124
  Returns:
125
  str: Formatted prompt for Gemini
126
  """
127
- # We do not log full JSON here to avoid huge payload in logs,
128
- # but we do log that prompt construction is happening.
129
  logger.debug("Building Gemini analysis prompt from PageSpeed data.")
130
- return (
131
- "**Role:** You are an **Expert Web Performance Optimization Analyst and Senior Full-Stack Engineer** "
132
- "with deep expertise in interpreting Google PageSpeed Insights data, diagnosing frontend and "
133
- "backend bottlenecks, and devising actionable, high-impact optimization strategies.\n\n"
134
- "**Objective:**\n"
135
- "Analyze the provided Google PageSpeed Insights JSON data for the analyzed website. "
136
- "Your primary goal is to generate a comprehensive, prioritized, and actionable set of strategies "
137
- "to significantly improve its performance. These strategies must directly address the specific "
138
- "metrics and audit findings within the report, aiming to elevate both Core Web Vitals "
139
- "(LCP, INP, CLS) and other key performance indicators (FCP, TTFB, TBT), and ultimately "
140
- "improve the `overall_category` to 'FAST' where possible.\n\n"
141
- "**Input Data:**\n"
142
- "The following JSON object contains the complete PageSpeed Insights report:\n"
143
- f"```json\n{json.dumps(pagespeed_data, indent=2)}\n```\n\n"
144
- "**Analysis and Strategy Formulation - Instructions:**\n\n"
145
- "1. **Executive Performance Summary:**\n"
146
- " * Begin with a concise overview of the website's current performance status based on the provided data.\n"
147
- " * Highlight the `overall_category` for both `loadingExperience` (specific URL) and `originLoadingExperience` (entire origin).\n"
148
- " * Pinpoint the current values and `category` (e.g., FAST, AVERAGE, SLOW) for each key metric:\n"
149
- " * `CUMULATIVE_LAYOUT_SHIFT_SCORE` (CLS)\n"
150
- " * `EXPERIMENTAL_TIME_TO_FIRST_BYTE` (TTFB)\n"
151
- " * `FIRST_CONTENTFUL_PAINT_MS` (FCP)\n"
152
- " * `INTERACTION_TO_NEXT_PAINT` (INP)\n"
153
- " * `LARGEST_CONTENTFUL_PAINT_MS` (LCP)\n"
154
- " * `total-blocking-time` (TBT) from Lighthouse.\n"
155
- " * Identify any significant `metricSavings` opportunities highlighted in the Lighthouse `audits`.\n\n"
156
- "2. **Deep-Dive into Bottlenecks & Audit Failures:**\n"
157
- " * Systematically go through the `loadingExperience`, `originLoadingExperience`, and `lighthouseResult` (especially the `audits` section).\n"
158
- " * For each underperforming metric or failed/suboptimal audit (e.g., Lighthouse scores less than 1, or `notApplicable` audits with clear improvement paths like `lcp-lazy-loaded`, `critical-request-chains`, `dom-size`, `non-composited-animations`), extract the relevant details, display values, and numeric values.\n\n"
159
- "3. **Develop Prioritized, Actionable Optimization Strategies:**\n"
160
- " For *each* identified performance issue or opportunity, provide the following:\n"
161
- " * **A. Issue & Evidence:** Clearly state the problem (e.g., \"High Total Blocking Time,\" \"Suboptimal Largest Contentful Paint due to unoptimized image,\" \"Excessive DOM Size,\" \"Render-blocking resources in critical request chain\"). Refer directly to the JSON data points and audit IDs that support this finding (e.g., `audits['total-blocking-time'].numericValue`, `audits['critical-request-chains'].details.longestChain`).\n"
162
- " * **B. Root Cause Analysis (Inferred):** Briefly explain the likely technical reasons behind the issue based on the data.\n"
163
- " * **C. Specific, Technical Recommendation(s):** Provide detailed, actionable steps a development team can take. Be specific.\n"
164
- " * **D. Targeted Metric Improvement:** Specify which primary and secondary metrics this strategy will positively impact (e.g., \"This will directly reduce LCP and improve FCP,\" or \"This will significantly lower TBT and improve INP.\").\n"
165
- " * **E. Priority Level:** Assign a priority (High, Medium, Low) based on:\n"
166
- " * Impact on Core Web Vitals.\n"
167
- " * Potential for overall score improvement (consider `metricSavings`).\n"
168
- " * Severity of the issue (e.g., 'SLOW' or 'AVERAGE' categories).\n"
169
- " * Estimated implementation effort (favor high-impact, low/medium-effort tasks for higher priority).\n"
170
- " * **F. Justification for Priority:** Briefly explain why this priority was assigned.\n\n"
171
- "4. **Strategic Grouping (Optional but Recommended):**\n"
172
- " If applicable, group recommendations by area (e.g., Asset Optimization, JavaScript Optimization, Server-Side Improvements, Rendering Path Optimization, CSS Enhancements).\n\n"
173
- "5. **Anticipated Overall Impact:**\n"
174
- " Conclude with a statement on the anticipated overall improvement in performance and user experience if the high and medium-priority recommendations are implemented.\n\n"
175
- "**Output Format:**\n"
176
- "Please structure your response clearly. Use headings, subheadings, and bullet points to enhance readability and actionability. For example:\n\n"
177
- "---\n"
178
- "## Executive Performance Summary\n"
179
- "* **Overall URL Loading Experience Category:** [e.g., AVERAGE]\n"
180
- "* **Overall Origin Loading Experience Category:** [e.g., AVERAGE]\n"
181
- "* **Key Metrics:**\n"
182
- " * LCP: [Value] ms ([Category])\n"
183
- " * INP: [Value] ms ([Category])\n"
184
- " * ...etc.\n\n"
185
- "---\n"
186
- "## Prioritized Optimization Strategies\n\n"
187
- "### High Priority\n"
188
- "**1. Issue & Evidence:** [e.g., High Total Blocking Time (TBT) of 1200 ms - `audits['total-blocking-time'].numericValue`]\n"
189
- " * **Root Cause Analysis:** [e.g., Long JavaScript tasks on the main thread during page load, likely from unoptimized third-party scripts or complex component rendering.]\n"
190
- " * **Specific, Technical Recommendation(s):**\n"
191
- " * [Action 1]\n"
192
- " * [Action 2]\n"
193
- " * **Targeted Metric Improvement:** [e.g., TBT, INP, FCP]\n"
194
- " * **Justification for Priority:** [e.g., Directly impacts interactivity (INP) and is a significant contributor to a poor lab score.]\n\n"
195
- "**(Continue with other High, Medium, and Low priority items)**\n"
196
- "---\n\n"
197
- "**Ensure your analysis is based *solely* on the provided JSON data and your expert interpretation of it. "
198
- "Avoid generic advice; all recommendations must be tied to specific findings within the report. "
199
- "Do not add anything irrelevant in the report. Do not write text in the starting of the report**"
200
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  def analyze_url(self, url: str) -> Dict[str, Any]:
203
  """
@@ -257,19 +276,45 @@ class PageSpeedService:
257
  try:
258
  model = genai.GenerativeModel("gemini-2.0-flash")
259
 
260
- prompt = (
261
- "You are an expert web performance analyst.\n"
262
- "Extract and organize the optimization recommendations from the following performance report\n"
263
- "into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".\n"
264
- "Each key’s value should be a list of suggestion strings.\n\n"
265
- "Important:\n"
266
- "- Respond with *only* a valid JSON object.\n"
267
- "- Do NOT include any commentary or explanation outside the JSON.\n\n"
268
- "Performance Report:\n"
269
- "```\n"
270
- + report +
271
- "\n```"
272
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
  response = model.generate_content(prompt)
275
  raw = (response.text or "").strip()
 
117
  def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
118
  """
119
  Create the specialized prompt for Gemini analysis.
120
+
121
  Args:
122
  pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
123
+
124
  Returns:
125
  str: Formatted prompt for Gemini
126
  """
 
 
127
  logger.debug("Building Gemini analysis prompt from PageSpeed data.")
128
+ return f"""
129
+ You are an **Expert Web Performance Optimization Consultant**. The following JSON `{{PSI_DATA}}` contains exactly these keys (all required):
130
+
131
+ ```
132
+ {{
133
+ "url": string, // analyzed page URL
134
+ "origin": string, // origin domain
135
+ "loading_experience": {{ // Chrome UX data for URL
136
+ "overall_category": "FAST"|"AVERAGE"|"SLOW",
137
+ "metrics": {{
138
+ "CLS": {{ "percentile": number, "category": string }},
139
+ "TTFB": {{ "percentile": number, "category": string }},
140
+ "FCP": {{ "percentile": number, "category": string }},
141
+ "INP": {{ "percentile": number, "category": string }}
142
+ }}
143
+ }},
144
+ "origin_loading_experience": {{ // Chrome UX data for origin
145
+ "overall_category": "FAST"|"AVERAGE"|"SLOW"
146
+ }},
147
+ "lighthouse_audits": [ // only audits with score <1 or notApplicable
148
+ {{
149
+ "id": string, // audit identifier
150
+ "numeric_value": number, // ms or unit value
151
+ "score": number|null, // 0–1 or null if N/A
152
+ "description": string, // audit title/description
153
+ "details": {{ // optional details for resource URLs
154
+ "items": [ {{ "url": string }} ]
155
+ }},
156
+ "metric_savings_ms"?: number // if available
157
+ }}
158
+ ]
159
+ }}
160
+ ```
161
+
162
+ Your job: output **exactly** the following JSON report—no extra keys, no prose outside these structures:
163
+
164
+ ```json
165
+ {{
166
+ "overall_score": integer,
167
+ "grade": "A"|"B"|"C"|"D"|"F",
168
+ "summary": {{
169
+ "CLS": {{ "value": number, "category": string }},
170
+ "TTFB": {{ "value": number, "category": string }},
171
+ "FCP": {{ "value": number, "category": string }},
172
+ "INP": {{ "value": number, "category": string }},
173
+ "LCP": {{ "value": number, "score": number }},
174
+ "TBT": {{ "value": number, "score": number }}
175
+ }},
176
+ "top_issues": [string],
177
+ "top_opportunities": [string],
178
+ "audits": [
179
+ {{
180
+ "id": string,
181
+ "value": number,
182
+ "score": number|null,
183
+ "resource_url"?: string, // first offending URL from details.items
184
+ "status": "critical"|"needs_improvement"|"good",
185
+ "recommendation": string,
186
+ "expected_gain_s": number
187
+ }}
188
+ ],
189
+ "action_plan": [
190
+ {{
191
+ "id": string,
192
+ "fix": string,
193
+ "platform_tip"?: string, // e.g. Next.js `next/image` or WordPress-specific advice
194
+ "effort": "low"|"medium"|"high"
195
+ }}
196
+ ],
197
+ "monitoring": {{
198
+ "frequency": string,
199
+ "methods": [string],
200
+ "ci_snippet"?: string // optional GitHub Action or Lighthouse CI config
201
+ }}
202
+ }}```
203
+ **Requirements:**
204
+ - **Strict Mapping:** Every field derives from `{{PSI_DATA}}` (use JSON paths like `lighthouseResult.audits[...].numeric_value`).
205
+ - **No Extra Text:** Only the JSON above.
206
+ - **Tie to JSON Paths:** Include resource URLs via `details.items[0].url`.
207
+ - **Exact Code Snippets:** Provide `<link rel="preload"...>` or `<script defer>` snippets.
208
+ - **Quantify Impact:** Use `metric_savings_ms` for each audit to calculate `expected_gain_s`.
209
+ - **Threshold Targets:** State target values, e.g. "Reduce LCP to ≤1200 ms".
210
+ - **Platform‑Specific Tips:** If known, include stack advice, e.g. Next.js `next/image` or WordPress plugins.
211
+ - **Monitoring CI:** Optionally include a GitHub Action snippet:
212
+ ```yaml
213
+ - uses: treosh/lighthouse-ci-action@v5
214
+ with:
215
+ configPath: .lighthouserc.json
216
+ ```
217
+ - **Deterministic Scoring & Priority:** Same as before.
218
+ """
219
+
220
 
221
  def analyze_url(self, url: str) -> Dict[str, Any]:
222
  """
 
276
  try:
277
  model = genai.GenerativeModel("gemini-2.0-flash")
278
 
279
+ prompt = f"""
280
+ You are an **Expert Web Performance Analyst & Optimization Engineer**.
281
+
282
+ Your task is to carefully analyze the provided PageSpeed Insights performance report.
283
+ Extract **all** optimization recommendations and organize them into a JSON object with exactly these keys:
284
+ - "high"
285
+ - "medium"
286
+ - "low"
287
+ - "unknown"
288
+
289
+ Extract and organize the optimization recommendations from the following performance report
290
+ into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".
291
+ Each key’s value should be a list of suggestion strings.
292
+
293
+ Classification Rules:
294
+ 1. **Audit Reference:** Cite the audit ID **and** full JSON path (e.g. `lighthouseResult.audits['unused-javascript'].details.items[0].url`).
295
+ 2. **Measurable Target:** Include the numeric goal (e.g., "Reduce LCP to ≤1200 ms").
296
+ 3. **Resource Context:** Embed the resource URL or file name when relevant.
297
+ 4. **Expected Savings:** Append expected savings in seconds (from `metric_savings_ms`).
298
+ 5. **Effort Estimate:** Add an effort estimate (e.g., "Effort: Medium (≈2 hrs)").
299
+ 6. **Code Snippet:** Provide a ready‑to‑copy snippet if applicable (e.g., `<img loading="lazy" src=...>`).
300
+ 7. **Category Tag:** Prefix with optimization domain `[Image]`, `[CSS]`, `[JS]`, `[Server]`.
301
+ 8. **Impact Score:** Append a simple impact rating (e.g., "Impact: ⭐⭐⭐☆☆" or "% of total savings").
302
+ 9. **Platform Tip:** If known, include stack‑specific advice (e.g., Next.js `next/image`).
303
+ 10. **Priority Classification:**
304
+ - High: Savings ≥ 1.5 seconds or score < 0.25
305
+ - Medium: Savings between 0.5 and 1.49 seconds or score 0.25 to 0.50
306
+ - Low: Savings < 0.5 seconds or score between 0.51 and 1.0
307
+ - Unknown: No savings or score data available
308
+
309
+ Important:
310
+ - Respond with *only* a valid JSON object.
311
+ - Do NOT include any commentary or explanation outside the JSON.
312
+
313
+ Performance Report:
314
+ {report}
315
+ """
316
+
317
+
318
 
319
  response = model.generate_content(prompt)
320
  raw = (response.text or "").strip()