Spaces:

Hammad712
/

MAAS

Sleeping

App Files Files Community

Hammad712 commited on Jul 6, 2025

Commit

54c31ea

1 Parent(s): b1f7307

Added SEO report generation and updated prompts

Browse files

Files changed (9) hide show

.gitignore +1 -0
app/main.py +5 -0
app/rag/embeddings.py +0 -29
app/rag/prompt_library.py +75 -0
app/rag/routes.py +2 -2
app/rag/utils.py +14 -3
app/seo/routes.py +38 -0
app/seo/seo_service.py +293 -0
app/services.py +133 -88

.gitignore CHANGED Viewed

@@ -4,6 +4,7 @@ __pycache__/
 *$py.class
 # Virtual environment
 venv/
 env/
 .myenv/

 *$py.class
 # Virtual environment
+.venv/
 venv/
 env/
 .myenv/

app/main.py CHANGED Viewed

@@ -22,6 +22,8 @@ from app.models import (
 )
 from app.services import PageSpeedService
 from app.rag.routes import router as rag_router
 # ------------------------
 # Configure root logger
@@ -63,6 +65,9 @@ app = FastAPI(
 # Mount RAG router
 app.include_router(rag_router)
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,

 )
 from app.services import PageSpeedService
 from app.rag.routes import router as rag_router
+from app.seo import routes as seo_routes
 # ------------------------
 # Configure root logger
 # Mount RAG router
 app.include_router(rag_router)
+app.include_router(seo_routes.router)
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,

app/rag/embeddings.py CHANGED Viewed

@@ -44,32 +44,3 @@ encode_kwargs = {"normalize_embeddings": True}
 embeddings = HuggingFaceBgeEmbeddings(
     model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
 )
-# ──────────────────────────────────────────────────────────────────────────────
-# 3. Prompt Template for RAG Assistant
-# ──────────────────────────────────────────────────────────────────────────────
-prompt_template = """
-You are an assistant specialized in analyzing and improving website performance. Your goal is to provide accurate, practical, and performance-driven answers.
-Use the following retrieved context (such as PageSpeed Insights data or audit results) to answer the user's question.
-If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
-Guidelines:
-1. Extract relevant performance insights from the context to form a helpful and actionable response.
-2. Maintain a clear, professional, and user-focused tone.
-3. If the question is unclear or needs more detail, ask for clarification politely.
-4. Prioritize recommendations that follow web performance best practices (e.g., optimizing load times, reducing blocking resources, improving visual stability).
-Retrieved context:
-{context}
-User's question:
-{question}
-Your response:
-"""
-user_prompt = ChatPromptTemplate.from_messages(
-    [
-        ("system", prompt_template),
-        ("human", "{question}"),
-    ]
-)

 embeddings = HuggingFaceBgeEmbeddings(
     model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
 )

app/rag/prompt_library.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from langchain.prompts import ChatPromptTemplate
+# ──────────────────────────────────────────────────────────────────────────────
+# 1. Prompt Template for PAGE Speed Insights RAG Chatbot
+# ──────────────────────────────────────────────────────────────────────────────
+prompt_template = """
+You are an assistant specialized in analyzing and improving website performance. Your goal is to provide accurate, practical, and performance-driven answers.
+Use the following retrieved context (such as PageSpeed Insights data or audit results) to answer the user's question.
+If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
+Guidelines:
+1. Extract relevant performance insights from the context to form a helpful and actionable response.
+2. Maintain a clear, professional, and user-focused tone.
+3. If the question is unclear or needs more detail, ask for clarification politely.
+4. Prioritize recommendations that follow web performance best practices (e.g., optimizing load times, reducing blocking resources, improving visual stability).
+Retrieved context:
+{context}
+User's question:
+{question}
+Your response:
+"""
+page_speed_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", prompt_template),
+        ("human", "{question}"),
+    ]
+)
+# ──────────────────────────────────────────────────────────────────────────────
+# 2. Prompt Template for Default RAG Chatbot
+# ──────────────────────────────────────────────────────────────────────────────
+default_user_prompt_template = """You are an assistant specialized in answering user questions based on the provided context.
+Use the following retrieved context to answer the user's question.
+If the context lacks sufficient information, respond with "I don't know."
+Do not make up answers or provide unverified information.
+Retrieved context:
+{context}
+User's question:
+{question}
+Your response:
+"""
+default_user_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", default_user_prompt_template),
+        ("human", "{question}"),
+    ]
+)
+# ──────────────────────────────────────────────────────────────────────────────
+# 3. Prompt Template for SEO RAG Chatbot
+# ──────────────────────────────────────────────────────────────────────────────
+seo_prompt_template = """You are an SEO assistant specialized in analyzing and improving website search engine optimization.
+Use the following retrieved context to answer the user's question.
+If the context lacks sufficient information, respond with "I don't know."
+Do not make up answers or provide unverified information.
+Retrieved context:
+{context}
+User's question:
+{question}
+Your response:
+"""
+seo_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", seo_prompt_template),
+        ("human", "{question}"),
+    ]
+)

app/rag/routes.py CHANGED Viewed

@@ -97,7 +97,7 @@ async def create_chat_session(user_id: str):
 @router.post("/chat/{user_id}/{chat_id}", response_model=ChatResponse)
-async def chat_with_user(user_id: str, chat_id: str, body: ChatRequest):
     question = body.question.strip()
     logger.info("Chat request user=%s chat=%s question=%s", user_id, chat_id, question)
@@ -112,7 +112,7 @@ async def chat_with_user(user_id: str, chat_id: str, body: ChatRequest):
         ChatHistoryManager.add_message(chat_id, role="human", content=question)
         # 4) Build and invoke the RAG chain
-        chain = build_rag_chain(user_id, chat_id)
         history = ChatHistoryManager.get_messages(chat_id)
         result = chain.invoke({"question": question, "chat_history": history})
         answer = result.get("answer") or result.get("output_text")

 @router.post("/chat/{user_id}/{chat_id}", response_model=ChatResponse)
+async def chat_with_user(user_id: str, chat_id: str, prompt_type:str, body: ChatRequest):
     question = body.question.strip()
     logger.info("Chat request user=%s chat=%s question=%s", user_id, chat_id, question)
         ChatHistoryManager.add_message(chat_id, role="human", content=question)
         # 4) Build and invoke the RAG chain
+        chain = build_rag_chain(user_id, chat_id , prompt_type)
         history = ChatHistoryManager.get_messages(chat_id)
         result = chain.invoke({"question": question, "chat_history": history})
         answer = result.get("answer") or result.get("output_text")

app/rag/utils.py CHANGED Viewed

@@ -9,8 +9,9 @@ from langchain.chains import ConversationalRetrievalChain
 from app.config import settings
 from .db import vectorstore_meta_coll, chat_collection_name
-from .embeddings import embeddings, text_splitter, user_prompt, get_llm
 from .logging_config import logger
 # ──────────────────────────────────────────────────────────────────────────────
 # 1. Helper: Path to Store (or Load) a User's FAISS Vectorstore on Disk
@@ -96,7 +97,7 @@ def initialize_chat_history(chat_id: str) -> MongoDBChatMessageHistory:
 # ──────────────────────────────────────────────────────────────────────────────
 # 6. Build a ConversationalRetrievalChain (RAG Chain) for user_id + chat_id
 # ──────────────────────────────────────────────────────────────────────────────
-def build_rag_chain(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
     """
     - Loads the FAISS index for user_id.
     - Creates a retriever (k=3).
@@ -123,6 +124,16 @@ def build_rag_chain(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
     # 4. Get the LLM
     llm = get_llm()
     # 5. Build the ConversationalRetrievalChain with the wrapped memory
     chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
@@ -130,7 +141,7 @@ def build_rag_chain(user_id: str, chat_id: str) -> ConversationalRetrievalChain:
         memory=memory,                             # ← pass the ConversationBufferMemory here
         return_source_documents=False,
         chain_type="stuff",
-        combine_docs_chain_kwargs={"prompt": user_prompt},
         verbose=False,
     )
     return chain

 from app.config import settings
 from .db import vectorstore_meta_coll, chat_collection_name
+from .embeddings import embeddings, text_splitter, get_llm
 from .logging_config import logger
+from app.rag.prompt_library import page_speed_prompt, default_user_prompt,seo_prompt
 # ──────────────────────────────────────────────────────────────────────────────
 # 1. Helper: Path to Store (or Load) a User's FAISS Vectorstore on Disk
 # ──────────────────────────────────────────────────────────────────────────────
 # 6. Build a ConversationalRetrievalChain (RAG Chain) for user_id + chat_id
 # ──────────────────────────────────────────────────────────────────────────────
+def build_rag_chain(user_id: str, chat_id: str, prompt_type: str) -> ConversationalRetrievalChain:
     """
     - Loads the FAISS index for user_id.
     - Creates a retriever (k=3).
     # 4. Get the LLM
     llm = get_llm()
+    if prompt_type == "page_speed":
+        # Use the specific prompt for Page Speed Insights
+        user_prompt = page_speed_prompt
+    elif prompt_type == "seo":
+        # Use the specific prompt for SEO
+        user_prompt = seo_prompt
+    else:
+        # Default to the user prompt if no specific type is provided
+        user_prompt = default_user_prompt
     # 5. Build the ConversationalRetrievalChain with the wrapped memory
     chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
         memory=memory,                             # ← pass the ConversationBufferMemory here
         return_source_documents=False,
         chain_type="stuff",
+        combine_docs_chain_kwargs={"prompt": user_prompt},  # Use the user prompt for combining docs
         verbose=False,
     )
     return chain

app/seo/routes.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import Any, Dict
+from .seo_service import SEOService
+router = APIRouter(prefix="/seo", tags=["SEO"])
+seo_service = SEOService()
+class SEORequest(BaseModel):
+    seo_data: Dict[str, Any]
+class SEOPriorityRequest(BaseModel):
+    report: str
+@router.post("/generate-report")
+def generate_seo_report(request: SEORequest):
+    """
+    Generate SEO report via Gemini.
+    """
+    try:
+        report = seo_service.generate_seo_report(request.seo_data)
+        return {"success": True, "report": report}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/generate-priority")
+def generate_seo_priority(request: SEOPriorityRequest):
+    """
+    Generate prioritized SEO suggestions from the report.
+    """
+    try:
+        priority_suggestions = seo_service.generate_seo_priority(request.report)
+        return {"success": True, "priority_suggestions": priority_suggestions}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

app/seo/seo_service.py ADDED Viewed

	@@ -0,0 +1,293 @@

+"""
+Business logic services for PageSpeed and SEO analysis.
+"""
+import json
+import requests
+import logging
+import google.generativeai as genai
+from typing import Dict, Any
+from app.config import settings
+# Create a module-level logger
+glogger = logging.getLogger(__name__)
+class SEOService:
+    """
+    Service class for generating SEO reports via Gemini.
+    """
+    def __init__(self):
+        self.gemini_api_key = settings.gemini_api_key
+        if self.gemini_api_key:
+            glogger.info("Configuring Gemini AI for SEO reporting.")
+            genai.configure(api_key=self.gemini_api_key)
+        else:
+            glogger.warning("No Gemini API key found. SEO reporting will fail if called.")
+    def generate_seo_report(self, seo_data: Dict[str, Any]) -> str:
+        """
+        Generate an SEO audit report using Gemini AI.
+        Args:
+            seo_data (Dict[str, Any]): Collected SEO metrics in JSON format.
+        Returns:
+            str: JSON-formatted SEO report string
+        Raises:
+            Exception: If report generation fails
+        """
+        glogger.info("Starting SEO report generation.")
+        if not self.gemini_api_key:
+            msg = "Gemini API key not configured"
+            glogger.error(msg)
+            raise Exception(msg)
+        prompt = self._create_seo_prompt(seo_data)
+        glogger.debug("SEO prompt: %s...", prompt[:200])
+        try:
+            model = genai.GenerativeModel("gemini-2.0-flash")
+            response = model.generate_content(prompt)
+            text = getattr(response, "text", None)
+            if not text:
+                raise Exception("Empty response from Gemini")
+            glogger.info("SEO report generated successfully.")
+            return text.strip()
+        except Exception as e:
+            msg = f"Error generating SEO report: {e}"
+            glogger.error(msg, exc_info=True)
+            raise
+    def _create_seo_prompt(self, seo_data: Dict[str, Any]) -> str:
+        """
+        Build the advanced prompt for SEO analysis based on the updated specialized template.
+        """
+        return f"""
+You are an **Expert SEO Consultant** with deep expertise in on‑page, technical, and off‑page SEO.
+The following JSON `{{SEO_DATA}}` contains exactly these keys (all required):
+{json.dumps(seo_data, indent=2)}
+Your task is to output **exactly** the following JSON report—no additional text, no extra keys, no commentary:
+```json
+{{
+  "overall_score": integer,
+  "grade": "A"|"B"|"C"|"D"|"F",
+  "top_strengths": [string],
+  "top_issues": [string],
+  "metrics": [
+    {{
+      "name": string,
+      "value": string|number|boolean|array,
+      "benchmark": string,
+      "score": integer,
+      "status": "good"|"needs_improvement"|"critical",
+      "why_it_matters": string,
+      "recommendation": string
+    }}
+  ],
+  "action_plan": [
+    {{
+      "metric": string,
+      "fix": string,
+      "effort_level": "low"|"medium"|"high"
+    }}
+  ],
+  "monitoring": {{
+    "frequency": string,
+    "methods": [string]
+  }},
+  "technical_seo": "data_unavailable" | {{
+    "core_web_vitals": {{
+      "LCP": string,
+      "FID": string,
+      "CLS": string
+    }},
+    "page_speed_score": integer,
+    "lazy_loading": boolean,
+    "security_headers": [string]
+  }},
+  "schema_markup": "data_unavailable" | {{
+    "structured_data_types": [string],
+    "valid": boolean
+  }},
+  "backlink_profile": "data_unavailable" | {{
+    "referring_domains": integer,
+    "toxic_links": integer,
+    "recommendations": string
+  }},
+  "trend_comparison": "data_unavailable" | {{
+    "previous_score": integer,
+    "change": "increase"|"decrease"|"no_change",
+    "comment": string
+  }}
+}}
+Instructions:
+Do not include any text before or after the JSON.
+Evaluate SEO performance holistically across all provided data:
+On‑Page SEO (titles, meta, headings, content, images, links)
+Technical SEO (robots.txt, sitemap.xml, indexability, mobile‑friendly, HTTPS, URL structure)
+Off‑Page SEO (backlink_profile)
+Use deterministic scoring based on internal benchmarks:
+SEO Score: ≤50=critical, 51–70=needs_improvement, >70=good
+Meta Title length: 50–60 chars=good, <50 or >60=needs_improvement
+H1 Tags: exactly 1=good, >1=needs_improvement, 0=critical
+Heading Structure errors: any=critical
+Image Alt Tags ratio: ≥90% good, 50–89% needs_improvement, <50% critical
+sitemapXmlCheck: missing=critical
+robotsTxtCheck: missing=critical
+indexabilityCheck: false=critical
+internalLinksCount: <5=needs_improvement
+externalLinksCount: <2=needs_improvement
+Advanced sections (technical_seo, schema_markup, backlink_profile, trend_comparison):
+If the input data lacks these metrics, set the field value to "data_unavailable".
+Otherwise, populate with real values (e.g., core web vitals, page speed score, backlink counts).
+The action_plan must list the 5 weakest metrics by score, across all sections.
+Set "monitoring.frequency" to:
+"weekly" if any metric status is "critical" or "needs_improvement".
+"monthly" if all metrics are "good".
+Grading scale:
+90–100: A
+80–89: B
+70–79: C
+60–69: D
+<60: F
+"""
+    def generate_seo_priority(self, report: str) -> Dict[str, Any]:
+        """
+        Generate a dictionary of prioritized performance recommendations based on the Gemini-generated report.
+        Args:
+            report (str): The Gemini-generated performance report
+        Returns:
+            Dict[str, Any]: Dictionary mapping priority levels to optimization suggestions
+        Raises:
+            Exception: If the priority generation fails
+        """
+        glogger.info("Generating prioritized suggestions from the Gemini report.")
+        if not self.gemini_api_key:
+            msg = "Gemini API key not configured"
+            glogger.error(msg)
+            raise Exception(msg)
+        try:
+            model = genai.GenerativeModel("gemini-2.0-flash")
+            prompt = f"""
+You are an **Expert Web Performance Analyst & Optimization Engineer**.
+Your task is to carefully analyze the provided PageSpeed Insights performance report.
+Extract **all** optimization recommendations and organize them into a JSON object with exactly these keys:
+  - "high"
+  - "medium"
+  - "low"
+  - "unknown"
+Extract and organize the optimization recommendations from the following performance report
+into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".
+Each key’s value should be a list of suggestion strings.
+Classification Rules:
+1. **Metric Reference:** For each suggestion, cite the metric name and full JSON path
+   (e.g. `metrics[2].name == "Keyword Density"` or `metrics[6].value`).
+2. **Benchmark Comparison:** Include both the **current value** and the **ideal benchmark**
+   (e.g. `"Current: 15 keywords, Ideal: 1–3% density"`).
+3. **Impact Estimate:** Quantify expected SEO impact (e.g. `"+12% CTR"` or `"+0.5 page rank score"`).
+4. **Effort Estimate:** Add an effort estimate (e.g. `"Effort: Low (≈1 hr)"`).
+5. **Code Snippet:** Provide a ready‑to‑copy example if applicable
+   (e.g. `<meta name="description" content="...">`).
+6. **Category Tag:** Prefix with SEO domain—
+   `[On-Page]`, `[Technical]`, `[Off-Page]`, `[Local]`, `[Schema]`.
+7. **Impact Score:** Append a simple impact rating (e.g. `"Impact: ⭐⭐⭐☆☆"`).
+8. **Platform Tip:** If applicable, include CMS or framework advice
+   (e.g. `"WordPress: use Yoast SEO"`, `"Next.js: use next/head"`).
+9. **Priority Classification:**
+- **High:** Any metric with score `"critical"` or < 60, or impact ≥ 10%.
+- **Medium:** Score 60–79 or impact 5–9%.
+- **Low:** Score 80–100 or impact < 5%.
+- **Unknown:** No score or impact data available.
+Important:
+- Respond with *only* a valid JSON object.
+- Do NOT include any commentary or explanation outside the JSON.
+Performance Report:
+{report}
+"""
+            response = model.generate_content(prompt)
+            raw = (response.text or "").strip()
+            glogger.debug("Raw priority response: %s", raw[:500] + ("…" if len(raw) > 500 else ""))
+            # Locate the JSON portion by finding the first '{' and the last '}'
+            start = raw.find('{')
+            end = raw.rfind('}')
+            if start == -1 or end == -1 or end <= start:
+                raise ValueError("No JSON object found in Gemini response")
+            json_str = raw[start:end+1]
+            glogger.debug("Extracted JSON string: %s", json_str)
+            suggestions = json.loads(json_str)
+            if not isinstance(suggestions, dict):
+                raise ValueError("Parsed JSON is not a dictionary")
+            # Ensure all expected keys exist
+            for key in ("high", "medium", "low", "unknown"):
+                suggestions.setdefault(key, [])
+            glogger.info("Priority suggestions generated successfully.")
+            return suggestions
+        except json.JSONDecodeError as je:
+            msg = f"Failed to parse JSON from Gemini response: {je}"
+            glogger.error(msg, exc_info=True)
+            raise Exception(msg)
+        except Exception as e:
+            msg = f"Error generating priority suggestions: {e}"
+            glogger.error(msg, exc_info=True)
+            raise

app/services.py CHANGED Viewed

@@ -117,87 +117,106 @@ class PageSpeedService:
     def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
         """
         Create the specialized prompt for Gemini analysis.
         Args:
             pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
         Returns:
             str: Formatted prompt for Gemini
         """
-        # We do not log full JSON here to avoid huge payload in logs,
-        # but we do log that prompt construction is happening.
         logger.debug("Building Gemini analysis prompt from PageSpeed data.")
-        return (
-            "**Role:** You are an **Expert Web Performance Optimization Analyst and Senior Full-Stack Engineer** "
-            "with deep expertise in interpreting Google PageSpeed Insights data, diagnosing frontend and "
-            "backend bottlenecks, and devising actionable, high-impact optimization strategies.\n\n"
-            "**Objective:**\n"
-            "Analyze the provided Google PageSpeed Insights JSON data for the analyzed website. "
-            "Your primary goal is to generate a comprehensive, prioritized, and actionable set of strategies "
-            "to significantly improve its performance. These strategies must directly address the specific "
-            "metrics and audit findings within the report, aiming to elevate both Core Web Vitals "
-            "(LCP, INP, CLS) and other key performance indicators (FCP, TTFB, TBT), and ultimately "
-            "improve the `overall_category` to 'FAST' where possible.\n\n"
-            "**Input Data:**\n"
-            "The following JSON object contains the complete PageSpeed Insights report:\n"
-            f"```json\n{json.dumps(pagespeed_data, indent=2)}\n```\n\n"
-            "**Analysis and Strategy Formulation - Instructions:**\n\n"
-            "1.  **Executive Performance Summary:**\n"
-            "    * Begin with a concise overview of the website's current performance status based on the provided data.\n"
-            "    * Highlight the `overall_category` for both `loadingExperience` (specific URL) and `originLoadingExperience` (entire origin).\n"
-            "    * Pinpoint the current values and `category` (e.g., FAST, AVERAGE, SLOW) for each key metric:\n"
-            "        * `CUMULATIVE_LAYOUT_SHIFT_SCORE` (CLS)\n"
-            "        * `EXPERIMENTAL_TIME_TO_FIRST_BYTE` (TTFB)\n"
-            "        * `FIRST_CONTENTFUL_PAINT_MS` (FCP)\n"
-            "        * `INTERACTION_TO_NEXT_PAINT` (INP)\n"
-            "        * `LARGEST_CONTENTFUL_PAINT_MS` (LCP)\n"
-            "        * `total-blocking-time` (TBT) from Lighthouse.\n"
-            "    * Identify any significant `metricSavings` opportunities highlighted in the Lighthouse `audits`.\n\n"
-            "2.  **Deep-Dive into Bottlenecks & Audit Failures:**\n"
-            "    * Systematically go through the `loadingExperience`, `originLoadingExperience`, and `lighthouseResult` (especially the `audits` section).\n"
-            "    * For each underperforming metric or failed/suboptimal audit (e.g., Lighthouse scores less than 1, or `notApplicable` audits with clear improvement paths like `lcp-lazy-loaded`, `critical-request-chains`, `dom-size`, `non-composited-animations`), extract the relevant details, display values, and numeric values.\n\n"
-            "3.  **Develop Prioritized, Actionable Optimization Strategies:**\n"
-            "    For *each* identified performance issue or opportunity, provide the following:\n"
-            "    * **A. Issue & Evidence:** Clearly state the problem (e.g., \"High Total Blocking Time,\" \"Suboptimal Largest Contentful Paint due to unoptimized image,\" \"Excessive DOM Size,\" \"Render-blocking resources in critical request chain\"). Refer directly to the JSON data points and audit IDs that support this finding (e.g., `audits['total-blocking-time'].numericValue`, `audits['critical-request-chains'].details.longestChain`).\n"
-            "    * **B. Root Cause Analysis (Inferred):** Briefly explain the likely technical reasons behind the issue based on the data.\n"
-            "    * **C. Specific, Technical Recommendation(s):** Provide detailed, actionable steps a development team can take. Be specific.\n"
-            "    * **D. Targeted Metric Improvement:** Specify which primary and secondary metrics this strategy will positively impact (e.g., \"This will directly reduce LCP and improve FCP,\" or \"This will significantly lower TBT and improve INP.\").\n"
-            "    * **E. Priority Level:** Assign a priority (High, Medium, Low) based on:\n"
-            "        * Impact on Core Web Vitals.\n"
-            "        * Potential for overall score improvement (consider `metricSavings`).\n"
-            "        * Severity of the issue (e.g., 'SLOW' or 'AVERAGE' categories).\n"
-            "        * Estimated implementation effort (favor high-impact, low/medium-effort tasks for higher priority).\n"
-            "    * **F. Justification for Priority:** Briefly explain why this priority was assigned.\n\n"
-            "4.  **Strategic Grouping (Optional but Recommended):**\n"
-            "    If applicable, group recommendations by area (e.g., Asset Optimization, JavaScript Optimization, Server-Side Improvements, Rendering Path Optimization, CSS Enhancements).\n\n"
-            "5.  **Anticipated Overall Impact:**\n"
-            "    Conclude with a statement on the anticipated overall improvement in performance and user experience if the high and medium-priority recommendations are implemented.\n\n"
-            "**Output Format:**\n"
-            "Please structure your response clearly. Use headings, subheadings, and bullet points to enhance readability and actionability. For example:\n\n"
-            "---\n"
-            "## Executive Performance Summary\n"
-            "* **Overall URL Loading Experience Category:** [e.g., AVERAGE]\n"
-            "* **Overall Origin Loading Experience Category:** [e.g., AVERAGE]\n"
-            "* **Key Metrics:**\n"
-            "    * LCP: [Value] ms ([Category])\n"
-            "    * INP: [Value] ms ([Category])\n"
-            "    * ...etc.\n\n"
-            "---\n"
-            "## Prioritized Optimization Strategies\n\n"
-            "### High Priority\n"
-            "**1. Issue & Evidence:** [e.g., High Total Blocking Time (TBT) of 1200 ms - `audits['total-blocking-time'].numericValue`]\n"
-            "    * **Root Cause Analysis:** [e.g., Long JavaScript tasks on the main thread during page load, likely from unoptimized third-party scripts or complex component rendering.]\n"
-            "    * **Specific, Technical Recommendation(s):**\n"
-            "        * [Action 1]\n"
-            "        * [Action 2]\n"
-            "    * **Targeted Metric Improvement:** [e.g., TBT, INP, FCP]\n"
-            "    * **Justification for Priority:** [e.g., Directly impacts interactivity (INP) and is a significant contributor to a poor lab score.]\n\n"
-            "**(Continue with other High, Medium, and Low priority items)**\n"
-            "---\n\n"
-            "**Ensure your analysis is based *solely* on the provided JSON data and your expert interpretation of it. "
-            "Avoid generic advice; all recommendations must be tied to specific findings within the report. "
-            "Do not add anything irrelevant in the report. Do not write text in the starting of the report**"
-        )
     def analyze_url(self, url: str) -> Dict[str, Any]:
         """
@@ -257,19 +276,45 @@ class PageSpeedService:
         try:
             model = genai.GenerativeModel("gemini-2.0-flash")
-            prompt = (
-                "You are an expert web performance analyst.\n"
-                "Extract and organize the optimization recommendations from the following performance report\n"
-                "into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".\n"
-                "Each key’s value should be a list of suggestion strings.\n\n"
-                "Important:\n"
-                "- Respond with *only* a valid JSON object.\n"
-                "- Do NOT include any commentary or explanation outside the JSON.\n\n"
-                "Performance Report:\n"
-                "```\n"
-                + report +
-                "\n```"
-            )
             response = model.generate_content(prompt)
             raw = (response.text or "").strip()

     def _create_analysis_prompt(self, pagespeed_data: Dict[Any, Any]) -> str:
         """
         Create the specialized prompt for Gemini analysis.
         Args:
             pagespeed_data (Dict[Any, Any]): PageSpeed Insights data
         Returns:
             str: Formatted prompt for Gemini
         """
         logger.debug("Building Gemini analysis prompt from PageSpeed data.")
+        return f"""
+    You are an **Expert Web Performance Optimization Consultant**. The following JSON `{{PSI_DATA}}` contains exactly these keys (all required):
+    ```
+    {{
+    "url": string,  // analyzed page URL
+    "origin": string,  // origin domain
+    "loading_experience": {{  // Chrome UX data for URL
+        "overall_category": "FAST"|"AVERAGE"|"SLOW",
+        "metrics": {{
+        "CLS": {{ "percentile": number, "category": string }},
+        "TTFB": {{ "percentile": number, "category": string }},
+        "FCP": {{ "percentile": number, "category": string }},
+        "INP": {{ "percentile": number, "category": string }}
+        }}
+    }},
+    "origin_loading_experience": {{  // Chrome UX data for origin
+        "overall_category": "FAST"|"AVERAGE"|"SLOW"
+    }},
+    "lighthouse_audits": [  // only audits with score <1 or notApplicable
+        {{
+        "id": string,  // audit identifier
+        "numeric_value": number,  // ms or unit value
+        "score": number|null,  // 0–1 or null if N/A
+        "description": string,  // audit title/description
+        "details": {{  // optional details for resource URLs
+            "items": [ {{ "url": string }} ]
+        }},
+        "metric_savings_ms"?: number  // if available
+        }}
+    ]
+    }}
+    ```
+    Your job: output **exactly** the following JSON report—no extra keys, no prose outside these structures:
+    ```json
+    {{
+    "overall_score": integer,
+    "grade": "A"|"B"|"C"|"D"|"F",
+    "summary": {{
+        "CLS": {{ "value": number, "category": string }},
+        "TTFB": {{ "value": number, "category": string }},
+        "FCP": {{ "value": number, "category": string }},
+        "INP": {{ "value": number, "category": string }},
+        "LCP": {{ "value": number, "score": number }},
+        "TBT": {{ "value": number, "score": number }}
+    }},
+    "top_issues": [string],
+    "top_opportunities": [string],
+    "audits": [
+        {{
+        "id": string,
+        "value": number,
+        "score": number|null,
+        "resource_url"?: string,  // first offending URL from details.items
+        "status": "critical"|"needs_improvement"|"good",
+        "recommendation": string,
+        "expected_gain_s": number
+        }}
+    ],
+    "action_plan": [
+        {{
+        "id": string,
+        "fix": string,
+        "platform_tip"?: string,  // e.g. Next.js `next/image` or WordPress-specific advice
+        "effort": "low"|"medium"|"high"
+        }}
+    ],
+    "monitoring": {{
+        "frequency": string,
+        "methods": [string],
+        "ci_snippet"?: string  // optional GitHub Action or Lighthouse CI config
+    }}
+    }}```
+    **Requirements:**
+    - **Strict Mapping:** Every field derives from `{{PSI_DATA}}` (use JSON paths like `lighthouseResult.audits[...].numeric_value`).
+    - **No Extra Text:** Only the JSON above.
+    - **Tie to JSON Paths:** Include resource URLs via `details.items[0].url`.
+    - **Exact Code Snippets:** Provide `<link rel="preload"...>` or `<script defer>` snippets.
+    - **Quantify Impact:** Use `metric_savings_ms` for each audit to calculate `expected_gain_s`.
+    - **Threshold Targets:** State target values, e.g. "Reduce LCP to ≤1200 ms".
+    - **Platform‑Specific Tips:** If known, include stack advice, e.g. Next.js `next/image` or WordPress plugins.
+    - **Monitoring CI:** Optionally include a GitHub Action snippet:
+    ```yaml
+    - uses: treosh/lighthouse-ci-action@v5
+        with:
+        configPath: .lighthouserc.json
+    ```
+    - **Deterministic Scoring & Priority:** Same as before.
+    """
     def analyze_url(self, url: str) -> Dict[str, Any]:
         """
         try:
             model = genai.GenerativeModel("gemini-2.0-flash")
+            prompt = f"""
+You are an **Expert Web Performance Analyst & Optimization Engineer**.
+Your task is to carefully analyze the provided PageSpeed Insights performance report.
+Extract **all** optimization recommendations and organize them into a JSON object with exactly these keys:
+  - "high"
+  - "medium"
+  - "low"
+  - "unknown"
+Extract and organize the optimization recommendations from the following performance report
+into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".
+Each key’s value should be a list of suggestion strings.
+Classification Rules:
+1. **Audit Reference:** Cite the audit ID **and** full JSON path (e.g. `lighthouseResult.audits['unused-javascript'].details.items[0].url`).
+2. **Measurable Target:** Include the numeric goal (e.g., "Reduce LCP to ≤1200 ms").
+3. **Resource Context:** Embed the resource URL or file name when relevant.
+4. **Expected Savings:** Append expected savings in seconds (from `metric_savings_ms`).
+5. **Effort Estimate:** Add an effort estimate (e.g., "Effort: Medium (≈2 hrs)").
+6. **Code Snippet:** Provide a ready‑to‑copy snippet if applicable (e.g., `<img loading="lazy" src=...>`).
+7. **Category Tag:** Prefix with optimization domain `[Image]`, `[CSS]`, `[JS]`, `[Server]`.
+8. **Impact Score:** Append a simple impact rating (e.g., "Impact: ⭐⭐⭐☆☆" or "% of total savings").
+9. **Platform Tip:** If known, include stack‑specific advice (e.g., Next.js `next/image`).
+10. **Priority Classification:**
+   - High: Savings ≥ 1.5 seconds or score < 0.25
+   - Medium: Savings between 0.5 and 1.49 seconds or score 0.25 to 0.50
+   - Low: Savings < 0.5 seconds or score between 0.51 and 1.0
+   - Unknown: No savings or score data available
+Important:
+- Respond with *only* a valid JSON object.
+- Do NOT include any commentary or explanation outside the JSON.
+Performance Report:
+{report}
+"""
             response = model.generate_content(prompt)
             raw = (response.text or "").strip()