Spaces:

Hammad712
/

MAAS

Runtime error

App Files Files Community

Hammad712 commited on Jul 31, 2025

Commit

538360f

1 Parent(s): 76e4427

Updated content relevance and added keywords endpoint

Browse files

Files changed (16) hide show

app/content_relevence/__init__.py +0 -0
app/content_relevence/content_relevance_service.py +56 -132
app/content_relevence/models.py +36 -21
app/content_relevence/prompts.py +43 -0
app/content_relevence/routes.py +16 -6
app/keywords/keywords_service.py +10 -0
app/keywords/model.py +10 -0
app/keywords/prompt.py +41 -0
app/keywords/routes.py +14 -0
app/main.py +26 -0
app/page_speed/config.py +2 -0
app/rag/embeddings.py +2 -2
app/seo/models.py +22 -2
app/seo/prompts.py +136 -0
app/seo/seo_service.py +61 -219
requirements.txt +8 -7

app/content_relevence/__init__.py ADDED Viewed

File without changes

app/content_relevence/content_relevance_service.py CHANGED Viewed

@@ -1,162 +1,86 @@
-# content_relevance_service.py
 """
-Business logic service for Content Relevance analysis.
 """
-import json
 import logging
-import google.generativeai as genai
 from typing import Dict, Any
 from app.page_speed.config import settings
-# Create a module-level logger
 glogger = logging.getLogger(__name__)
 class ContentRelevanceService:
     """
-    Service class for generating Content Relevance reports via Gemini AI.
     """
     def __init__(self):
-        self.gemini_api_key = settings.gemini_api_key
-        if self.gemini_api_key:
-            glogger.info("Configuring Gemini AI for Content Relevance reporting.")
-            genai.configure(api_key=self.gemini_api_key)
-        else:
-            glogger.warning("No Gemini API key found. Reporting will fail if called.")
     def generate_content_relevance_report(self, data: Dict[str, Any]) -> str:
         """
-        Generate a Content Relevance report using Gemini AI.
         """
-        glogger.info("Starting Content Relevance report generation.")
         if not self.gemini_api_key:
-            glogger.error("Gemini API key not configured")
             raise Exception("Gemini API key not configured")
         try:
-            prompt = self._create_relevance_prompt(data)
-            glogger.debug("Relevance prompt: %s", prompt[:200])
-            response = genai.GenerativeModel("gemini-2.0-flash").generate_content(prompt)
-            text = getattr(response, "text", None)
             if not text:
-                glogger.error("Empty response from Gemini")
-                raise Exception("Empty response from Gemini")
             glogger.info("Content Relevance report generated successfully.")
             return text.strip()
         except Exception as e:
-            glogger.error("Error during report generation: %s", e, exc_info=True)
             raise
-    def _create_relevance_prompt(self, data: Dict[str, Any]) -> str:
-        """
-        Build the enhanced prompt for Content Relevance analysis, including benchmarks, examples, and impact estimates.
-        """
-        keywords = data.get('keywords', [])
-        keyword_list = ", ".join(keywords)
-        return f"""
-You are a **Content Strategy Expert**. Analyze the following content metrics and target keywords for relevance, coverage, and practical SEO impact. Provide a detailed report in Markdown, using structured sections do not add tables in the report, with the following enhancements:
-1. **Summary of Relevance**:
-   - Brief overview of alignment with keywords: {keyword_list}
-   - Overall Content Relevance Score: {data.get('contentRelevanceScore')} (out of 10)
-2. **Metric Breakdown**:
-   For each metric below, include:
-   - **Value** (from data)
-   - **Benchmark** (ideal or industry standard)
-   - **Status**: good / needs improvement / critical
-   - **Why It Matters**: concise rationale
-   - **Specific Example**: show where/how to improve (e.g., exact H1 text with keyword)
-   - **Expected Impact**: estimated uplift (e.g., `+5% relevance`)
-   - **Keyword Coverage Score**: {data.get('keywordCoverageScore')}
-   - **Density Score**: {data.get('densityScore')}% (ideal 1–3%)
-   - **Readability**: {data.get('readabilityScoreOutOf10')} / 10 (ideal ≥ 6)
-   - **Word Count**: {data.get('wordCount')} words (benchmark 1500–3000)
-   - **Media Richness**: Images = {data.get('imageCount')}, Videos = {data.get('videoCount')} (ideal ≥ 2 videos)
-3. **Top Strengths**:
-   - List top 3 areas where the actual values exceed benchmarks, referencing metric names and values.
-4. **Key Issues & Recommendations**:
-   For each of the top 3 issues, provide:
-   - **Issue**: name and value vs. benchmark
-   - **Actionable Fix**: code or content snippet example, e.g.:
-     ```html
-     <h1>{keywords[0].capitalize()} Services for Your Business</h1>
-     ```
-   - **Effort**: low / medium / high
-   - **Expected Impact**: e.g., `+10% coverage`, `+3 readability`
-5. **Priority Action Plan**:
-   - Top 5 actions, with columns: Priority (1–5), Action, Effort, Expected Impact.
-6. **Monitoring & Next Steps**:
-   - Weekly or monthly tracking recommendations
-7. **Bonus**: Suggest 2 related long-tail keywords to enhance depth.
-Make the report engaging, use code blocks, and bullet lists where appropriate. Do not output JSON—provide a human-readable Markdown report. and do not write anything outside the report format."""
-    def generate_content_priority(self, report: str) -> Dict[str, Any]:
         """
-        Generate prioritized content relevance recommendations based on the AI-generated report.
-        Args:
-            report (str): The Markdown-formatted content relevance report.
-        Returns:
-            Dict[str, Any]: Dictionary mapping priority levels to recommendation lists.
-        Raises:
-            Exception: If priority generation fails.
         """
-        glogger.info("Generating prioritized suggestions from the content relevance report.")
-        if not self.gemini_api_key:
-            msg = "Gemini API key not configured"
-            glogger.error(msg)
-            raise Exception(msg)
         try:
-            model = genai.GenerativeModel("gemini-2.0-flash")
-            prompt = f"""
-You are a **Content Strategy Expert**. Extract all actionable recommendations from the following content relevance report and organize them into a JSON object with keys: "high", "medium", "low".
-For each recommendation, include:
-- "recommendation": the action text
-- "impact": the expected impact (e.g. "+5% relevance")
-- "effort": low/medium/high
-Important:
-- Respond with *only* a valid JSON object.
-- Do NOT include any commentary or explanation outside the JSON.
- of t
-Report:
-{report}
-Respond with only a JSON object.
-"""
-            response = model.generate_content(prompt)
-            raw = (response.text or "").strip()
-            glogger.debug("Raw priority response: %s", raw[:200])
-            # Extract JSON
-            start = raw.find('{')
-            end = raw.rfind('}')
-            if start == -1 or end == -1 or end <= start:
-                raise ValueError("No JSON object found in response")
-            json_str = raw[start:end+1]
-            suggestions = json.loads(json_str)
-            if not isinstance(suggestions, dict):
-                raise ValueError("Parsed JSON is not a dictionary")
-            for key in ("high", "medium", "low", "unknown"):
-                suggestions.setdefault(key, [])
-            glogger.info("Priority suggestions generated successfully.")
-            return suggestions
-        except json.JSONDecodeError as je:
-            msg = f"Failed to parse JSON: {je}"
-            glogger.error(msg, exc_info=True)
-            raise Exception(msg)
         except Exception as e:
-            msg = f"Error generating content priority suggestions: {e}"
-            glogger.error(msg, exc_info=True)
-            raise

+# app/content_relevance/content_relevance_service.py
 """
+Business logic service for Content Relevance analysis and prioritization (mirroring SEOService).
 """
+import os
+import getpass
 import logging
 from typing import Dict, Any
 from app.page_speed.config import settings
+from app.content_relevence.models import Recommendation, PrioritySuggestions
+from app.content_relevence.prompts import ContentRelevancePrompts
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import PydanticOutputParser
+# Module-level logger
 glogger = logging.getLogger(__name__)
 class ContentRelevanceService:
     """
+    Service class for generating Content Relevance reports and prioritized suggestions via Gemini.
     """
     def __init__(self):
+        # configure Gemini key
+        key = settings.gemini_api_key or os.getenv("GEMINI_API_KEY")
+        if not key:
+            key = getpass.getpass("Enter your Gemini API key: ")
+        self.gemini_api_key = key
+        # initialize LangChain LLM wrapper
+        self.llm = ChatGoogleGenerativeAI(
+            model="gemini-2.5-flash",
+            temperature=0,
+            max_tokens=None,
+            timeout=None,
+            max_retries=3,
+            api_key=self.gemini_api_key
+        )
+        # Prompt template for raw report
+        self.report_prompt = ChatPromptTemplate.from_messages([
+            ("system", ContentRelevancePrompts.REPORT_PROMPT),
+            ("human", "{data}")
+        ])
+        # Prompt + parser for prioritized suggestions
+        self.parser = PydanticOutputParser(pydantic_object=Recommendation)
+        priority_template = ChatPromptTemplate.from_messages([
+            ("system", ContentRelevancePrompts.SYSTEM_PROMPT),
+            ("human", "{report}")
+        ]).partial(format_instructions=self.parser.get_format_instructions())
+        self.priority_chain = priority_template | self.llm | self.parser
     def generate_content_relevance_report(self, data: Dict[str, Any]) -> str:
         """
+        Generate a Markdown Content Relevance report.
         """
+        glogger.info("Starting Content Relevance report generation via llm.invoke.")
         if not self.gemini_api_key:
             raise Exception("Gemini API key not configured")
         try:
+            report = (self.report_prompt | self.llm).invoke({"data": data})
+            text = getattr(report, 'content', None) or getattr(report, 'text', None)
             if not text:
+                raise Exception("Empty response from Gemini via llm.invoke")
             glogger.info("Content Relevance report generated successfully.")
             return text.strip()
         except Exception as e:
+            glogger.error("Error generating content relevance report: %s", e, exc_info=True)
             raise
+    def generate_content_priority(self, report: str) -> PrioritySuggestions:
         """
+        Generate prioritized content relevance suggestions from a Markdown report.
         """
+        glogger.info("Generating prioritized content relevance suggestions via chain.invoke.")
         try:
+            rec: Recommendation = self.priority_chain.invoke({"report": report})
+            return rec.priority_suggestions
         except Exception as e:
+            glogger.error("Error generating content priority suggestions: %s", e, exc_info=True)
+            raise

app/content_relevence/models.py CHANGED Viewed

@@ -1,27 +1,42 @@
-# models.py
-from pydantic import BaseModel
-from typing import Any, Dict, List, Optional
-import logging
-# Optionally create a logger here if you need to log model-related events
-model_logger = logging.getLogger(__name__)
 class ContentRelevanceRequest(BaseModel):
-    data: Dict[str, Any]
-    def __init__(self, **data):
-        super().__init__(**data)
-        model_logger.debug("Initialized ContentRelevanceRequest with data: %s", self.data)
 class ContentRelevanceResponse(BaseModel):
-    success: bool
-    report: str
-    priorities: Dict[str, Any]
-    def __init__(self, **data):
-        super().__init__(**data)
-        model_logger.debug(
-            "Initialized ContentRelevanceResponse with success=%s, keys: %s",
-            self.success,
-            list(self.priorities.keys()) if self.priorities else []
-        )

+# app/content_relevance/models.py
+"""
+Pydantic models for Content Relevance requests and recommendations (mirroring SEO logic).
+"""
+from pydantic import BaseModel, Field
+from typing import Any, Dict, List
 class ContentRelevanceRequest(BaseModel):
+    """Payload for incoming content relevance data."""
+    data: Dict[str, Any] = Field(
+        ..., description="Raw metrics and keyword data for relevance analysis."
+    )
+class PrioritySuggestions(BaseModel):
+    """Categorized content relevance suggestions by effort level."""
+    high: List[str] = Field(
+        ..., description="High-effort content relevance suggestion strings."
+    )
+    medium: List[str] = Field(
+        ..., description="Medium-effort content relevance suggestion strings."
+    )
+    low: List[str] = Field(
+        ..., description="Low-effort content relevance suggestion strings."
+    )
+class Recommendation(BaseModel):
+    """Wrapper for prioritized content relevance suggestions."""
+    priority_suggestions: PrioritySuggestions = Field(
+        ..., description="All content relevance suggestions categorized by effort level."
+    )
 class ContentRelevanceResponse(BaseModel):
+    """Response model for the combined content relevance endpoint."""
+    success: bool = Field(..., description="Indicates if the operation was successful.")
+    report: str = Field(..., description="Markdown-formatted content relevance report.")
+    priorities: PrioritySuggestions = Field(
+        ..., description="Categorized priority suggestions."
+    )

app/content_relevence/prompts.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# app/content_relevance/prompts.py
+"""
+Prompt templates for Content Relevance analysis services.
+"""
+class ContentRelevancePrompts:
+    """
+    Container for content relevance prompt templates.
+    """
+    SYSTEM_PROMPT = '''
+You are a **Content Strategy Expert**. Extract all actionable recommendations from the following content relevance report and organize them into a JSON object with keys: "high", "medium", "low".
+For each recommendation, include:
+- Plain-English sentence prefixed by a category tag (e.g. [Content]) and suffixed with (Effort Level: low|medium|high).
+Important:
+- Respond with *only* a valid JSON object.
+- Do NOT include any commentary or explanation outside the JSON.
+{format_instructions}
+Report:
+{report}
+'''
+    REPORT_PROMPT = '''
+You are a **Content Strategy Expert**. Analyze the following content metrics and target keywords for relevance, coverage, and practical SEO impact. Generate a detailed Markdown report with sections:
+- Overall Summary
+- Metric Breakdown
+- Top Strengths
+- Key Issues & Recommendations
+- Priority Action Plan
+- Monitoring & Next Steps
+- Bonus long-tail keyword suggestions
+Use bullet lists, headings, code blocks; do NOT output JSON.
+Data:
+{data}
+'''

app/content_relevence/routes.py CHANGED Viewed

@@ -1,8 +1,9 @@
-# routes.py
 from fastapi import APIRouter, HTTPException, Request
 import logging
-from .content_relevance_service import ContentRelevanceService
-from .models import ContentRelevanceRequest, ContentRelevanceResponse
 # Create a module-level logger
 router_logger = logging.getLogger(__name__)
@@ -11,7 +12,10 @@ router = APIRouter(prefix="/content-relevance", tags=["ContentRelevance"])
 service = ContentRelevanceService()
 @router.post("/report", response_model=ContentRelevanceResponse)
-async def generate_full_content_relevance(request: Request, payload: ContentRelevanceRequest):
     """
     Generate a full Content Relevance report and corresponding prioritized suggestions.
     """
@@ -25,8 +29,14 @@ async def generate_full_content_relevance(request: Request, payload: ContentRele
         priorities = service.generate_content_priority(report)
         router_logger.info("Priorities extracted successfully")
-        return ContentRelevanceResponse(success=True, report=report, priorities=priorities)
     except Exception as e:
-        router_logger.error("Error during content relevance processing: %s", e, exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))

+# app/content_relevance/routes.py
 from fastapi import APIRouter, HTTPException, Request
 import logging
+from app.content_relevence.content_relevance_service import ContentRelevanceService
+from app.content_relevence.models import ContentRelevanceRequest, ContentRelevanceResponse
 # Create a module-level logger
 router_logger = logging.getLogger(__name__)
 service = ContentRelevanceService()
 @router.post("/report", response_model=ContentRelevanceResponse)
+async def generate_full_content_relevance(
+    request: Request,
+    payload: ContentRelevanceRequest
+) -> ContentRelevanceResponse:
     """
     Generate a full Content Relevance report and corresponding prioritized suggestions.
     """
         priorities = service.generate_content_priority(report)
         router_logger.info("Priorities extracted successfully")
+        return ContentRelevanceResponse(
+            success=True,
+            report=report,
+            priorities=priorities
+        )
     except Exception as e:
+        router_logger.error(
+            "Error during content relevance processing: %s", e, exc_info=True
+        )
         raise HTTPException(status_code=500, detail=str(e))

app/keywords/keywords_service.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .prompt import chain
+from .model import BusinessDescription, KeywordsResponse
+def generate_keywords_service(input_data: BusinessDescription) -> KeywordsResponse:
+    """Invoke the LangChain chain to generate keywords."""
+    result: KeywordsResponse = chain.invoke({
+        "business_description": input_data.description
+    })
+    return result

app/keywords/model.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from pydantic import BaseModel, Field
+from typing import List
+class BusinessDescription(BaseModel):
+    description: str = Field(..., description="The business description to base keywords on.")
+class KeywordsResponse(BaseModel):
+    keywords: List[str] = Field(
+        ..., description="A list of relevant keywords generated from the business description."
+    )

app/keywords/prompt.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import PydanticOutputParser
+from .model import KeywordsResponse
+# Initialize LLM
+GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GOOGLE_API_KEY:
+    raise EnvironmentError("GOOGLE_API_KEY not set in environment variables")
+llm = ChatGoogleGenerativeAI(
+    model="gemini-2.5-flash",
+    temperature=0.0,
+    max_tokens=500,
+    timeout=60,
+    max_retries=3,
+    api_key=GOOGLE_API_KEY
+)
+# Set up parser
+parser = PydanticOutputParser(pydantic_object=KeywordsResponse)
+# Build prompt
+prompt = ChatPromptTemplate.from_messages([
+    ("system", """
+You are an expert SEO strategist and content marketer.
+Generate the **top 10** most relevant keywords and key phrases
+that a business should target, based on the following description.
+**IMPORTANT**:
+- Return _only_ a JSON object with a single key, `keywords`.
+- The value must be an array of strings.
+- Do NOT include any markdown, bullet lists, commentary, or extra keys.
+{format_instructions}
+"""),
+    ("user", "{business_description}")
+]).partial(format_instructions=parser.get_format_instructions())
+# Compose chain
+chain = prompt | llm | parser

app/keywords/routes.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from fastapi import APIRouter, HTTPException
+from app.keywords.model import BusinessDescription, KeywordsResponse
+from app.keywords.keywords_service import generate_keywords_service
+router = APIRouter(prefix="/keywords", tags=["keywords"])
+@router.post("/generate", response_model=KeywordsResponse)
+async def generate_keywords(business: BusinessDescription):
+    try:
+        response = generate_keywords_service(business)
+        return response
+    except Exception as e:
+        # Log exception if you have logging set up
+        raise HTTPException(status_code=500, detail=str(e))

app/main.py CHANGED Viewed

@@ -16,6 +16,29 @@ from app.rag.routes import router as rag_router
 from app.seo import routes as seo_routes
 from app.page_speed import routes as page_speed_routes
 from app.content_relevence import routes as content_relevance_routes
 # ------------------------
 # Configure root logger
@@ -64,6 +87,9 @@ app.include_router(content_relevance_routes.router)
 # Mount PageSpeed router
 app.include_router(page_speed_routes.router)
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,

 from app.seo import routes as seo_routes
 from app.page_speed import routes as page_speed_routes
 from app.content_relevence import routes as content_relevance_routes
+from app.keywords.routes import router as keywords_router
+# app/suppress_warnings.py
+import warnings
+# Suppress Pydantic config change warning
+warnings.filterwarnings(
+    "ignore",
+    message="Valid config keys have changed in V2:*",
+    category=UserWarning,
+    module="pydantic._internal._config",
+)
+# Suppress other optional warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+try:
+    from langchain_core._api.deprecation import LangChainDeprecationWarning
+    warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
+except ImportError:
+    pass
 # ------------------------
 # Configure root logger
 # Mount PageSpeed router
 app.include_router(page_speed_routes.router)
+# Mount the keywords router
+app.include_router(keywords_router)
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,

app/page_speed/config.py CHANGED Viewed

@@ -8,6 +8,8 @@ class Settings(BaseSettings):
     # ───────────────────────────────────────────────────────────────────────────
     pagespeed_api_key: str
     gemini_api_key: str
     # ───────────────────────────────────────────────────────────────────────────
     # Chat & RAG Configuration

     # ───────────────────────────────────────────────────────────────────────────
     pagespeed_api_key: str
     gemini_api_key: str
+    google_api_key1: str
     # ───────────────────────────────────────────────────────────────────────────
     # Chat & RAG Configuration

app/rag/embeddings.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from dotenv import load_dotenv
@@ -40,6 +40,6 @@ login(HF_TOKEN)
 model_name = "BAAI/bge-small-en-v1.5"
 model_kwargs = {"device": "cpu"}
 encode_kwargs = {"normalize_embeddings": True}
-embeddings = HuggingFaceBgeEmbeddings(
     model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
 )

 import os
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from dotenv import load_dotenv
 model_name = "BAAI/bge-small-en-v1.5"
 model_kwargs = {"device": "cpu"}
 encode_kwargs = {"normalize_embeddings": True}
+embeddings = HuggingFaceEmbeddings(
     model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
 )

app/seo/models.py CHANGED Viewed

@@ -1,5 +1,25 @@
-from pydantic import BaseModel
-from typing import Any, Dict
 class SEORequest(BaseModel):
     seo_data: Dict[str, Any]

+# app/seo/models.py
+"""
+Pydantic models for SEO requests and recommendations.
+"""
+from pydantic import BaseModel, Field
+from typing import Any, Dict, List
 class SEORequest(BaseModel):
+    """Payload for incoming SEO data."""
     seo_data: Dict[str, Any]
+class PrioritySuggestions(BaseModel):
+    """Categorized SEO suggestions by effort level."""
+    high: List[str] = Field(..., description="High-effort SEO suggestion strings.")
+    medium: List[str] = Field(..., description="Medium-effort SEO suggestion strings.")
+    low: List[str] = Field(..., description="Low-effort SEO suggestion strings.")
+class Recommendation(BaseModel):
+    """Wrapper for prioritized SEO suggestions."""
+    priority_suggestions: PrioritySuggestions = Field(
+        ..., description="All SEO suggestions categorized by effort level."
+    )

app/seo/prompts.py ADDED Viewed

	@@ -0,0 +1,136 @@

+"""
+Prompt templates for SEO analysis services.
+"""
+class SEOPrompts:
+    """
+    Container class for SEO-related prompt templates.
+    """
+    SYSTEM_PROMPT = """
+You are an **Expert Web Performance Analyst & Optimization Engineer**.
+Analyze the provided PageSpeed Insights performance report and extract **all** optimization recommendations.
+Return *only* a JSON object that has a single top-level key, `priority_suggestions`, whose value is an object containing exactly three lists:
+- `"high"`
+- `"medium"`
+- `"low"`
+Each list item must be a **plain-English sentence**, prefixed with its SEO category tag (e.g. `[On-Page]` or `[Schema]`), and suffixed with `(Effort Level: high|medium|low)`.
+{format_instructions}
+Performance Report:
+{report}
+        """
+    Report_PROMPT = """
+You are an **Expert SEO Consultant** with advanced knowledge of on-page, technical, and off-page SEO.
+Your task is to analyze this data and return a detailed SEO audit report as a **multi-line string** (not as JSON). Keep it structured, clear, and easy to read — for example, using sections, bullet points, and indentation.
+Include these sections in your output:
+---
+**Overall Summary**
+- Overall SEO Score: (0–100)
+- Grade: A, B, C, D, or F
+- Top Strengths: List the top 3–5 strong areas
+- Top Issues: List the top 3–5 weak/problematic areas
+---
+**Metric Breakdown**
+For each key metric in the data:
+- Metric Name
+- Value: ...
+- Benchmark: ...
+- Score: ...
+- Status: good / needs improvement / critical
+- Why It Matters: Explain simply
+- Recommendation: What to fix or improve
+---
+**Action Plan**
+List 5 weakest metrics and how to fix them:
+- Metric: ...
+  - Fix: ...
+  - Effort Level: low / medium / high
+---
+**Monitoring Strategy**
+- Frequency: weekly or monthly (based on severity of issues)
+- Methods: Tools or techniques to track progress
+---
+**Technical SEO**
+If data is available, include:
+- Core Web Vitals (LCP, FID, CLS)
+- Page Speed Score
+- Lazy Loading Enabled
+- Security Headers Present
+If not available, just write "Technical SEO data not available."
+---
+**Schema Markup**
+If available:
+- Types Detected
+- Is Valid: Yes/No
+Else: "Schema markup data not available."
+---
+**Backlink Profile**
+If available:
+- Referring Domains
+- Toxic Links
+- Recommendations to improve off-page SEO
+---
+**Trend Comparison**
+If available:
+- Previous Score
+- Score Change (increase, decrease, or no change)
+- Comment
+---
+### ⚙️ Scoring Rules Summary (for reference):
+- SEO Score: ≤50 = critical, 51–70 = needs improvement, >70 = good
+- Meta Title: 50–60 chars = good, else needs improvement
+- H1 Tags: exactly 1 = good, 0 or >1 = needs improvement/critical
+- Heading Errors: any = critical
+- Image Alt Tags: ≥90% = good, 50–89% = needs improvement, <50% = critical
+- sitemapXmlCheck / robotsTxtCheck: missing = critical
+- indexabilityCheck: false = critical
+- internalLinksCount: <5 = needs improvement
+- externalLinksCount: <2 = needs improvement
+Use these rules to calculate metric status and overall grade:
+- 90–100 → A
+- 80–89 → B
+- 70–79 → C
+- 60–69 → D
+- <60 → F
+Things to aviod while generating the report
+Don't:
+1- Do not write anything except the report
+2- Do not add anything in the start or end of the report
+3- Do not write text in the start of the report
+4- Do not write anything like this in the start that here is the report generated etc
+SEO data provided in JSON format:
+{seo_data}
+"""

app/seo/seo_service.py CHANGED Viewed

@@ -1,267 +1,109 @@
 """
 Business logic services for PageSpeed and SEO analysis.
 """
-import json
-import requests
 import logging
-import google.generativeai as genai
 from typing import Dict, Any
 from app.page_speed.config import settings
-# Create a module-level logger
 glogger = logging.getLogger(__name__)
 class SEOService:
     """
-    Service class for generating SEO reports via Gemini.
     """
     def __init__(self):
-        self.gemini_api_key = settings.gemini_api_key
-        if self.gemini_api_key:
-            glogger.info("Configuring Gemini AI for SEO reporting.")
-            genai.configure(api_key=self.gemini_api_key)
-        else:
-            glogger.warning("No Gemini API key found. SEO reporting will fail if called.")
     def generate_seo_report(self, seo_data: Dict[str, Any]) -> str:
         """
-        Generate an SEO audit report using Gemini AI.
         Args:
-            seo_data (Dict[str, Any]): Collected SEO metrics in JSON format.
         Returns:
-            str: JSON-formatted SEO report string
         Raises:
             Exception: If report generation fails
         """
-        glogger.info("Starting SEO report generation.")
         if not self.gemini_api_key:
             msg = "Gemini API key not configured"
             glogger.error(msg)
             raise Exception(msg)
-        prompt = self._create_seo_prompt(seo_data)
-        glogger.debug("SEO prompt: %s...", prompt[:200])
         try:
-            model = genai.GenerativeModel("gemini-2.0-flash")
-            response = model.generate_content(prompt)
-            text = getattr(response, "text", None)
-            if not text:
-                raise Exception("Empty response from Gemini")
             glogger.info("SEO report generated successfully.")
-            return text.strip()
         except Exception as e:
             msg = f"Error generating SEO report: {e}"
             glogger.error(msg, exc_info=True)
             raise
-    def _create_seo_prompt(self, seo_data: Dict[str, Any]) -> str:
-        """
-        Build the advanced prompt for SEO analysis based on the updated specialized template.
-        """
-        return f"""
-You are an **Expert SEO Consultant** with advanced knowledge of on-page, technical, and off-page SEO.
-Your task is to analyze this data and return a detailed SEO audit report as a **multi-line string** (not as JSON). Keep it structured, clear, and easy to read — for example, using sections, bullet points, and indentation.
-Include these sections in your output:
----
-**Overall Summary**
-- Overall SEO Score: (0–100)
-- Grade: A, B, C, D, or F
-- Top Strengths: List the top 3–5 strong areas
-- Top Issues: List the top 3–5 weak/problematic areas
----
-**Metric Breakdown**
-For each key metric in the data:
-- Metric Name
-- Value: ...
-- Benchmark: ...
-- Score: ...
-- Status: good / needs improvement / critical
-- Why It Matters: Explain simply
-- Recommendation: What to fix or improve
----
-**Action Plan**
-List 5 weakest metrics and how to fix them:
-- Metric: ...
-  - Fix: ...
-  - Effort Level: low / medium / high
----
-**Monitoring Strategy**
-- Frequency: weekly or monthly (based on severity of issues)
-- Methods: Tools or techniques to track progress
----
-**Technical SEO**
-If data is available, include:
-- Core Web Vitals (LCP, FID, CLS)
-- Page Speed Score
-- Lazy Loading Enabled
-- Security Headers Present
-If not available, just write “Technical SEO data not available.”
----
-**Schema Markup**
-If available:
-- Types Detected
-- Is Valid: Yes/No
-Else: “Schema markup data not available.”
----
-**Backlink Profile**
-If available:
-- Referring Domains
-- Toxic Links
-- Recommendations to improve off-page SEO
----
-**Trend Comparison**
-If available:
-- Previous Score
-- Score Change (increase, decrease, or no change)
-- Comment
----
-### ⚙️ Scoring Rules Summary (for reference):
-- SEO Score: ≤50 = critical, 51–70 = needs improvement, >70 = good
-- Meta Title: 50–60 chars = good, else needs improvement
-- H1 Tags: exactly 1 = good, 0 or >1 = needs improvement/critical
-- Heading Errors: any = critical
-- Image Alt Tags: ≥90% = good, 50–89% = needs improvement, <50% = critical
-- sitemapXmlCheck / robotsTxtCheck: missing = critical
-- indexabilityCheck: false = critical
-- internalLinksCount: <5 = needs improvement
-- externalLinksCount: <2 = needs improvement
-Use these rules to calculate metric status and overall grade:
-- 90–100 → A
-- 80–89 → B
-- 70–79 → C
-- 60–69 → D
-- <60 → F
-SEO data provided in JSON format:
-{seo_data}
-"""
-    def generate_seo_priority(self, report: str) -> Dict[str, Any]:
         """
-        Generate a dictionary of prioritized performance recommendations based on the Gemini-generated report.
         Args:
-            report (str): The Gemini-generated performance report
         Returns:
-            Dict[str, Any]: Dictionary mapping priority levels to optimization suggestions
-        Raises:
-            Exception: If the priority generation fails
         """
-        glogger.info("Generating prioritized suggestions from the Gemini report.")
-        if not self.gemini_api_key:
-            msg = "Gemini API key not configured"
-            glogger.error(msg)
-            raise Exception(msg)
         try:
-            model = genai.GenerativeModel("gemini-2.0-flash")
-            prompt = f"""
-You are an **Expert Web Performance Analyst & Optimization Engineer**.
-Your task is to carefully analyze the provided PageSpeed Insights performance report.
-Extract **all** optimization recommendations and organize them into a JSON object with exactly these keys:
-  - "high"
-  - "medium"
-  - "low"
-  - "unknown"
-Extract and organize the optimization recommendations from the following performance report
-into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".
-Each key’s value should be a list of suggestion strings.
-Classification Rules:
-1. **Metric Reference:** For each suggestion, cite the metric name and full JSON path
-   (e.g. `metrics[2].name == "Keyword Density"` or `metrics[6].value`).
-2. **Benchmark Comparison:** Include both the **current value** and the **ideal benchmark**
-   (e.g. `"Current: 15 keywords, Ideal: 1–3% density"`).
-3. **Impact Estimate:** Quantify expected SEO impact (e.g. `"+12% CTR"` or `"+0.5 page rank score"`).
-4. **Code Snippet:** Provide a ready‑to‑copy example if applicable
-   (e.g. `<meta name="description" content="...">`).
-5. **Category Tag:** Prefix with SEO domain—
-   `[On-Page]`, `[Technical]`, `[Off-Page]`, `[Local]`, `[Schema]`.
-6. **Platform Tip:** If applicable, include CMS or framework advice
-   (e.g. `"WordPress: use Yoast SEO"`, `"Next.js: use next/head"`).
-7. **Priority Classification:**
-- **High:** Any metric with score `"critical"` or < 60, or impact ≥ 10%.
-- **Medium:** Score 60–79 or impact 5–9%.
-- **Low:** Score 80–100 or impact < 5%.
-- **Unknown:** No score or impact data available.
-8. Explain in easy english, avoiding technical jargon and explaination for technical terms.
-Important:
-- Respond with *only* a valid JSON object.
-- Do NOT include any commentary or explanation outside the JSON.
-Performance Report:
-{report}
-"""
-            response = model.generate_content(prompt)
-            raw = (response.text or "").strip()
-            glogger.debug("Raw priority response: %s", raw[:500] + ("…" if len(raw) > 500 else ""))
-            # Locate the JSON portion by finding the first '{' and the last '}'
-            start = raw.find('{')
-            end = raw.rfind('}')
-            if start == -1 or end == -1 or end <= start:
-                raise ValueError("No JSON object found in Gemini response")
-            json_str = raw[start:end+1]
-            glogger.debug("Extracted JSON string: %s", json_str)
-            suggestions = json.loads(json_str)
-            if not isinstance(suggestions, dict):
-                raise ValueError("Parsed JSON is not a dictionary")
-            # Ensure all expected keys exist
-            for key in ("high", "medium", "low", "unknown"):
-                suggestions.setdefault(key, [])
-            glogger.info("Priority suggestions generated successfully.")
-            return suggestions
-        except json.JSONDecodeError as je:
-            msg = f"Failed to parse JSON from Gemini response: {je}"
-            glogger.error(msg, exc_info=True)
-            raise Exception(msg)
         except Exception as e:
             msg = f"Error generating priority suggestions: {e}"
             glogger.error(msg, exc_info=True)
             raise

 """
 Business logic services for PageSpeed and SEO analysis.
 """
+import os
+import getpass
 import logging
 from typing import Dict, Any
 from app.page_speed.config import settings
+from app.seo.models import Recommendation, PrioritySuggestions
+from app.seo.prompts import SEOPrompts
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import PydanticOutputParser
+# Module-level logger
 glogger = logging.getLogger(__name__)
 class SEOService:
     """
+    Service class for generating SEO reports and prioritized suggestions via Gemini.
     """
     def __init__(self):
+        # configure Gemini key
+        key = settings.gemini_api_key or os.getenv("GEMINI_API_KEY")
+        if not key:
+            key = getpass.getpass("Enter your Gemini API key: ")
+        self.gemini_api_key = key
+        # initialize LangChain LLM wrapper
+        self.llm = ChatGoogleGenerativeAI(
+            model="gemini-2.5-flash",
+            temperature=0,
+            max_tokens=None,
+            timeout=None,
+            max_retries=3,
+            api_key=self.gemini_api_key
+        )
+        # Prompt template for raw SEO report
+        self.report_prompt = ChatPromptTemplate.from_messages([
+            ("system", SEOPrompts.Report_PROMPT),
+            ("human", "Please generate a comprehensive SEO audit report based on the following data:\n\n{seo_data}")
+        ])
+        # Prompt + parser for prioritized suggestions
+        self.parser = PydanticOutputParser(pydantic_object=Recommendation)
+        self.priority_chain = (
+            ChatPromptTemplate.from_messages([
+                ("system", SEOPrompts.SYSTEM_PROMPT),
+                ("human", "{report}")
+            ]).partial(format_instructions=self.parser.get_format_instructions())
+            | self.llm
+            | self.parser
+        )
     def generate_seo_report(self, seo_data: Dict[str, Any]) -> str:
         """
+        Generate an SEO audit report using Gemini AI via llm.invoke.
         Args:
+            seo_data (Dict[str, Any]): Collected SEO metrics in JSON-serializable format.
         Returns:
+            str: Raw text SEO report
         Raises:
             Exception: If report generation fails
         """
+        glogger.info("Starting SEO report generation via llm.invoke.")
         if not self.gemini_api_key:
             msg = "Gemini API key not configured"
             glogger.error(msg)
             raise Exception(msg)
+        prompt_input = {"seo_data": seo_data}
+        glogger.debug("Invoking LLM for SEO report with data keys: %s", list(seo_data.keys()))
         try:
+            # llm.invoke returns the raw string response
+            report_text: str = self.report_prompt | self.llm
+            report = report_text.invoke(prompt_input)
+            if not report:
+                raise Exception("Empty response from Gemini via llm.invoke")
             glogger.info("SEO report generated successfully.")
+            return report.content.strip()
         except Exception as e:
             msg = f"Error generating SEO report: {e}"
             glogger.error(msg, exc_info=True)
             raise
+    def generate_seo_priority(self, report: str) -> PrioritySuggestions:
         """
+        Generate prioritized SEO suggestions from a report via chain.invoke.
         Args:
+            report (str): SEO report content
         Returns:
+            PrioritySuggestions: Parsed, prioritized recommendations
         """
+        glogger.info("Generating prioritized SEO suggestions via chain.invoke.")
         try:
+            rec: Recommendation = self.priority_chain.invoke({"report": report})
+            return rec.priority_suggestions
         except Exception as e:
             msg = f"Error generating priority suggestions: {e}"
             glogger.error(msg, exc_info=True)
             raise

requirements.txt CHANGED Viewed

@@ -1,9 +1,9 @@
-fastapi==0.104.1
-uvicorn==0.24.0
-python-dotenv==1.0.0
-requests==2.31.0
-google-generativeai==0.3.2
-pydantic==2.5.0
 pydantic_settings
 langchain_groq
 langchain_community
@@ -11,5 +11,6 @@ faiss-cpu
 pymongo
 langchain-mongodb
 huggingface_hub
-python_dotenv
 sentence_transformers

+fastapi
+uvicorn
+python-dotenv
+requests
+google-generativeai
+pydantic
 pydantic_settings
 langchain_groq
 langchain_community
 pymongo
 langchain-mongodb
 huggingface_hub
 sentence_transformers
+langchain_google_genai
+langchain_huggingface