Hammad712 commited on
Commit
538360f
·
1 Parent(s): 76e4427

Updated content relevance and added keywords endpoint

Browse files
app/content_relevence/__init__.py ADDED
File without changes
app/content_relevence/content_relevance_service.py CHANGED
@@ -1,162 +1,86 @@
1
- # content_relevance_service.py
2
  """
3
- Business logic service for Content Relevance analysis.
4
  """
5
- import json
 
6
  import logging
7
- import google.generativeai as genai
8
  from typing import Dict, Any
 
9
  from app.page_speed.config import settings
 
 
 
 
 
 
10
 
11
- # Create a module-level logger
12
  glogger = logging.getLogger(__name__)
13
 
 
14
  class ContentRelevanceService:
15
  """
16
- Service class for generating Content Relevance reports via Gemini AI.
17
  """
18
  def __init__(self):
19
- self.gemini_api_key = settings.gemini_api_key
20
- if self.gemini_api_key:
21
- glogger.info("Configuring Gemini AI for Content Relevance reporting.")
22
- genai.configure(api_key=self.gemini_api_key)
23
- else:
24
- glogger.warning("No Gemini API key found. Reporting will fail if called.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def generate_content_relevance_report(self, data: Dict[str, Any]) -> str:
27
  """
28
- Generate a Content Relevance report using Gemini AI.
29
  """
30
- glogger.info("Starting Content Relevance report generation.")
31
  if not self.gemini_api_key:
32
- glogger.error("Gemini API key not configured")
33
  raise Exception("Gemini API key not configured")
34
 
35
  try:
36
- prompt = self._create_relevance_prompt(data)
37
- glogger.debug("Relevance prompt: %s", prompt[:200])
38
- response = genai.GenerativeModel("gemini-2.0-flash").generate_content(prompt)
39
- text = getattr(response, "text", None)
40
  if not text:
41
- glogger.error("Empty response from Gemini")
42
- raise Exception("Empty response from Gemini")
43
  glogger.info("Content Relevance report generated successfully.")
44
  return text.strip()
45
  except Exception as e:
46
- glogger.error("Error during report generation: %s", e, exc_info=True)
47
  raise
48
 
49
- def _create_relevance_prompt(self, data: Dict[str, Any]) -> str:
50
- """
51
- Build the enhanced prompt for Content Relevance analysis, including benchmarks, examples, and impact estimates.
52
- """
53
- keywords = data.get('keywords', [])
54
- keyword_list = ", ".join(keywords)
55
- return f"""
56
- You are a **Content Strategy Expert**. Analyze the following content metrics and target keywords for relevance, coverage, and practical SEO impact. Provide a detailed report in Markdown, using structured sections do not add tables in the report, with the following enhancements:
57
-
58
- 1. **Summary of Relevance**:
59
- - Brief overview of alignment with keywords: {keyword_list}
60
- - Overall Content Relevance Score: {data.get('contentRelevanceScore')} (out of 10)
61
-
62
- 2. **Metric Breakdown**:
63
- For each metric below, include:
64
- - **Value** (from data)
65
- - **Benchmark** (ideal or industry standard)
66
- - **Status**: good / needs improvement / critical
67
- - **Why It Matters**: concise rationale
68
- - **Specific Example**: show where/how to improve (e.g., exact H1 text with keyword)
69
- - **Expected Impact**: estimated uplift (e.g., `+5% relevance`)
70
-
71
- - **Keyword Coverage Score**: {data.get('keywordCoverageScore')}
72
- - **Density Score**: {data.get('densityScore')}% (ideal 1–3%)
73
- - **Readability**: {data.get('readabilityScoreOutOf10')} / 10 (ideal ≥ 6)
74
- - **Word Count**: {data.get('wordCount')} words (benchmark 1500–3000)
75
- - **Media Richness**: Images = {data.get('imageCount')}, Videos = {data.get('videoCount')} (ideal ≥ 2 videos)
76
-
77
- 3. **Top Strengths**:
78
- - List top 3 areas where the actual values exceed benchmarks, referencing metric names and values.
79
-
80
- 4. **Key Issues & Recommendations**:
81
- For each of the top 3 issues, provide:
82
- - **Issue**: name and value vs. benchmark
83
- - **Actionable Fix**: code or content snippet example, e.g.:
84
- ```html
85
- <h1>{keywords[0].capitalize()} Services for Your Business</h1>
86
- ```
87
- - **Effort**: low / medium / high
88
- - **Expected Impact**: e.g., `+10% coverage`, `+3 readability`
89
-
90
- 5. **Priority Action Plan**:
91
- - Top 5 actions, with columns: Priority (1–5), Action, Effort, Expected Impact.
92
-
93
- 6. **Monitoring & Next Steps**:
94
- - Weekly or monthly tracking recommendations
95
-
96
- 7. **Bonus**: Suggest 2 related long-tail keywords to enhance depth.
97
-
98
- Make the report engaging, use code blocks, and bullet lists where appropriate. Do not output JSON—provide a human-readable Markdown report. and do not write anything outside the report format."""
99
-
100
-
101
- def generate_content_priority(self, report: str) -> Dict[str, Any]:
102
  """
103
- Generate prioritized content relevance recommendations based on the AI-generated report.
104
-
105
- Args:
106
- report (str): The Markdown-formatted content relevance report.
107
-
108
- Returns:
109
- Dict[str, Any]: Dictionary mapping priority levels to recommendation lists.
110
-
111
- Raises:
112
- Exception: If priority generation fails.
113
  """
114
- glogger.info("Generating prioritized suggestions from the content relevance report.")
115
- if not self.gemini_api_key:
116
- msg = "Gemini API key not configured"
117
- glogger.error(msg)
118
- raise Exception(msg)
119
  try:
120
- model = genai.GenerativeModel("gemini-2.0-flash")
121
- prompt = f"""
122
- You are a **Content Strategy Expert**. Extract all actionable recommendations from the following content relevance report and organize them into a JSON object with keys: "high", "medium", "low".
123
-
124
- For each recommendation, include:
125
- - "recommendation": the action text
126
- - "impact": the expected impact (e.g. "+5% relevance")
127
- - "effort": low/medium/high
128
-
129
- Important:
130
- - Respond with *only* a valid JSON object.
131
- - Do NOT include any commentary or explanation outside the JSON.
132
- of t
133
-
134
- Report:
135
- {report}
136
-
137
- Respond with only a JSON object.
138
- """
139
- response = model.generate_content(prompt)
140
- raw = (response.text or "").strip()
141
- glogger.debug("Raw priority response: %s", raw[:200])
142
- # Extract JSON
143
- start = raw.find('{')
144
- end = raw.rfind('}')
145
- if start == -1 or end == -1 or end <= start:
146
- raise ValueError("No JSON object found in response")
147
- json_str = raw[start:end+1]
148
- suggestions = json.loads(json_str)
149
- if not isinstance(suggestions, dict):
150
- raise ValueError("Parsed JSON is not a dictionary")
151
- for key in ("high", "medium", "low", "unknown"):
152
- suggestions.setdefault(key, [])
153
- glogger.info("Priority suggestions generated successfully.")
154
- return suggestions
155
- except json.JSONDecodeError as je:
156
- msg = f"Failed to parse JSON: {je}"
157
- glogger.error(msg, exc_info=True)
158
- raise Exception(msg)
159
  except Exception as e:
160
- msg = f"Error generating content priority suggestions: {e}"
161
- glogger.error(msg, exc_info=True)
162
- raise
 
1
+ # app/content_relevance/content_relevance_service.py
2
  """
3
+ Business logic service for Content Relevance analysis and prioritization (mirroring SEOService).
4
  """
5
+ import os
6
+ import getpass
7
  import logging
 
8
  from typing import Dict, Any
9
+
10
  from app.page_speed.config import settings
11
+ from app.content_relevence.models import Recommendation, PrioritySuggestions
12
+ from app.content_relevence.prompts import ContentRelevancePrompts
13
+
14
+ from langchain_google_genai import ChatGoogleGenerativeAI
15
+ from langchain_core.prompts import ChatPromptTemplate
16
+ from langchain_core.output_parsers import PydanticOutputParser
17
 
18
+ # Module-level logger
19
  glogger = logging.getLogger(__name__)
20
 
21
+
22
  class ContentRelevanceService:
23
  """
24
+ Service class for generating Content Relevance reports and prioritized suggestions via Gemini.
25
  """
26
  def __init__(self):
27
+ # configure Gemini key
28
+ key = settings.gemini_api_key or os.getenv("GEMINI_API_KEY")
29
+ if not key:
30
+ key = getpass.getpass("Enter your Gemini API key: ")
31
+ self.gemini_api_key = key
32
+
33
+ # initialize LangChain LLM wrapper
34
+ self.llm = ChatGoogleGenerativeAI(
35
+ model="gemini-2.5-flash",
36
+ temperature=0,
37
+ max_tokens=None,
38
+ timeout=None,
39
+ max_retries=3,
40
+ api_key=self.gemini_api_key
41
+ )
42
+
43
+ # Prompt template for raw report
44
+ self.report_prompt = ChatPromptTemplate.from_messages([
45
+ ("system", ContentRelevancePrompts.REPORT_PROMPT),
46
+ ("human", "{data}")
47
+ ])
48
+
49
+ # Prompt + parser for prioritized suggestions
50
+ self.parser = PydanticOutputParser(pydantic_object=Recommendation)
51
+ priority_template = ChatPromptTemplate.from_messages([
52
+ ("system", ContentRelevancePrompts.SYSTEM_PROMPT),
53
+ ("human", "{report}")
54
+ ]).partial(format_instructions=self.parser.get_format_instructions())
55
+ self.priority_chain = priority_template | self.llm | self.parser
56
 
57
  def generate_content_relevance_report(self, data: Dict[str, Any]) -> str:
58
  """
59
+ Generate a Markdown Content Relevance report.
60
  """
61
+ glogger.info("Starting Content Relevance report generation via llm.invoke.")
62
  if not self.gemini_api_key:
 
63
  raise Exception("Gemini API key not configured")
64
 
65
  try:
66
+ report = (self.report_prompt | self.llm).invoke({"data": data})
67
+ text = getattr(report, 'content', None) or getattr(report, 'text', None)
 
 
68
  if not text:
69
+ raise Exception("Empty response from Gemini via llm.invoke")
 
70
  glogger.info("Content Relevance report generated successfully.")
71
  return text.strip()
72
  except Exception as e:
73
+ glogger.error("Error generating content relevance report: %s", e, exc_info=True)
74
  raise
75
 
76
+ def generate_content_priority(self, report: str) -> PrioritySuggestions:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  """
78
+ Generate prioritized content relevance suggestions from a Markdown report.
 
 
 
 
 
 
 
 
 
79
  """
80
+ glogger.info("Generating prioritized content relevance suggestions via chain.invoke.")
 
 
 
 
81
  try:
82
+ rec: Recommendation = self.priority_chain.invoke({"report": report})
83
+ return rec.priority_suggestions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  except Exception as e:
85
+ glogger.error("Error generating content priority suggestions: %s", e, exc_info=True)
86
+ raise
 
app/content_relevence/models.py CHANGED
@@ -1,27 +1,42 @@
1
- # models.py
2
- from pydantic import BaseModel
3
- from typing import Any, Dict, List, Optional
4
- import logging
 
 
5
 
6
- # Optionally create a logger here if you need to log model-related events
7
- model_logger = logging.getLogger(__name__)
8
 
9
  class ContentRelevanceRequest(BaseModel):
10
- data: Dict[str, Any]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- def __init__(self, **data):
13
- super().__init__(**data)
14
- model_logger.debug("Initialized ContentRelevanceRequest with data: %s", self.data)
15
 
16
  class ContentRelevanceResponse(BaseModel):
17
- success: bool
18
- report: str
19
- priorities: Dict[str, Any]
20
-
21
- def __init__(self, **data):
22
- super().__init__(**data)
23
- model_logger.debug(
24
- "Initialized ContentRelevanceResponse with success=%s, keys: %s",
25
- self.success,
26
- list(self.priorities.keys()) if self.priorities else []
27
- )
 
1
+ # app/content_relevance/models.py
2
+ """
3
+ Pydantic models for Content Relevance requests and recommendations (mirroring SEO logic).
4
+ """
5
+ from pydantic import BaseModel, Field
6
+ from typing import Any, Dict, List
7
 
 
 
8
 
9
  class ContentRelevanceRequest(BaseModel):
10
+ """Payload for incoming content relevance data."""
11
+ data: Dict[str, Any] = Field(
12
+ ..., description="Raw metrics and keyword data for relevance analysis."
13
+ )
14
+
15
+
16
+ class PrioritySuggestions(BaseModel):
17
+ """Categorized content relevance suggestions by effort level."""
18
+ high: List[str] = Field(
19
+ ..., description="High-effort content relevance suggestion strings."
20
+ )
21
+ medium: List[str] = Field(
22
+ ..., description="Medium-effort content relevance suggestion strings."
23
+ )
24
+ low: List[str] = Field(
25
+ ..., description="Low-effort content relevance suggestion strings."
26
+ )
27
+
28
+
29
+ class Recommendation(BaseModel):
30
+ """Wrapper for prioritized content relevance suggestions."""
31
+ priority_suggestions: PrioritySuggestions = Field(
32
+ ..., description="All content relevance suggestions categorized by effort level."
33
+ )
34
 
 
 
 
35
 
36
  class ContentRelevanceResponse(BaseModel):
37
+ """Response model for the combined content relevance endpoint."""
38
+ success: bool = Field(..., description="Indicates if the operation was successful.")
39
+ report: str = Field(..., description="Markdown-formatted content relevance report.")
40
+ priorities: PrioritySuggestions = Field(
41
+ ..., description="Categorized priority suggestions."
42
+ )
 
 
 
 
 
app/content_relevence/prompts.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/content_relevance/prompts.py
2
+ """
3
+ Prompt templates for Content Relevance analysis services.
4
+ """
5
+
6
+ class ContentRelevancePrompts:
7
+ """
8
+ Container for content relevance prompt templates.
9
+ """
10
+
11
+ SYSTEM_PROMPT = '''
12
+ You are a **Content Strategy Expert**. Extract all actionable recommendations from the following content relevance report and organize them into a JSON object with keys: "high", "medium", "low".
13
+
14
+ For each recommendation, include:
15
+ - Plain-English sentence prefixed by a category tag (e.g. [Content]) and suffixed with (Effort Level: low|medium|high).
16
+
17
+ Important:
18
+ - Respond with *only* a valid JSON object.
19
+ - Do NOT include any commentary or explanation outside the JSON.
20
+
21
+ {format_instructions}
22
+
23
+ Report:
24
+ {report}
25
+
26
+ '''
27
+
28
+ REPORT_PROMPT = '''
29
+ You are a **Content Strategy Expert**. Analyze the following content metrics and target keywords for relevance, coverage, and practical SEO impact. Generate a detailed Markdown report with sections:
30
+
31
+ - Overall Summary
32
+ - Metric Breakdown
33
+ - Top Strengths
34
+ - Key Issues & Recommendations
35
+ - Priority Action Plan
36
+ - Monitoring & Next Steps
37
+ - Bonus long-tail keyword suggestions
38
+
39
+ Use bullet lists, headings, code blocks; do NOT output JSON.
40
+
41
+ Data:
42
+ {data}
43
+ '''
app/content_relevence/routes.py CHANGED
@@ -1,8 +1,9 @@
1
- # routes.py
2
  from fastapi import APIRouter, HTTPException, Request
3
  import logging
4
- from .content_relevance_service import ContentRelevanceService
5
- from .models import ContentRelevanceRequest, ContentRelevanceResponse
 
6
 
7
  # Create a module-level logger
8
  router_logger = logging.getLogger(__name__)
@@ -11,7 +12,10 @@ router = APIRouter(prefix="/content-relevance", tags=["ContentRelevance"])
11
  service = ContentRelevanceService()
12
 
13
  @router.post("/report", response_model=ContentRelevanceResponse)
14
- async def generate_full_content_relevance(request: Request, payload: ContentRelevanceRequest):
 
 
 
15
  """
16
  Generate a full Content Relevance report and corresponding prioritized suggestions.
17
  """
@@ -25,8 +29,14 @@ async def generate_full_content_relevance(request: Request, payload: ContentRele
25
  priorities = service.generate_content_priority(report)
26
  router_logger.info("Priorities extracted successfully")
27
 
28
- return ContentRelevanceResponse(success=True, report=report, priorities=priorities)
 
 
 
 
29
 
30
  except Exception as e:
31
- router_logger.error("Error during content relevance processing: %s", e, exc_info=True)
 
 
32
  raise HTTPException(status_code=500, detail=str(e))
 
1
+ # app/content_relevance/routes.py
2
  from fastapi import APIRouter, HTTPException, Request
3
  import logging
4
+
5
+ from app.content_relevence.content_relevance_service import ContentRelevanceService
6
+ from app.content_relevence.models import ContentRelevanceRequest, ContentRelevanceResponse
7
 
8
  # Create a module-level logger
9
  router_logger = logging.getLogger(__name__)
 
12
  service = ContentRelevanceService()
13
 
14
  @router.post("/report", response_model=ContentRelevanceResponse)
15
+ async def generate_full_content_relevance(
16
+ request: Request,
17
+ payload: ContentRelevanceRequest
18
+ ) -> ContentRelevanceResponse:
19
  """
20
  Generate a full Content Relevance report and corresponding prioritized suggestions.
21
  """
 
29
  priorities = service.generate_content_priority(report)
30
  router_logger.info("Priorities extracted successfully")
31
 
32
+ return ContentRelevanceResponse(
33
+ success=True,
34
+ report=report,
35
+ priorities=priorities
36
+ )
37
 
38
  except Exception as e:
39
+ router_logger.error(
40
+ "Error during content relevance processing: %s", e, exc_info=True
41
+ )
42
  raise HTTPException(status_code=500, detail=str(e))
app/keywords/keywords_service.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from .prompt import chain
2
+ from .model import BusinessDescription, KeywordsResponse
3
+
4
+
5
+ def generate_keywords_service(input_data: BusinessDescription) -> KeywordsResponse:
6
+ """Invoke the LangChain chain to generate keywords."""
7
+ result: KeywordsResponse = chain.invoke({
8
+ "business_description": input_data.description
9
+ })
10
+ return result
app/keywords/model.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List
3
+
4
+ class BusinessDescription(BaseModel):
5
+ description: str = Field(..., description="The business description to base keywords on.")
6
+
7
+ class KeywordsResponse(BaseModel):
8
+ keywords: List[str] = Field(
9
+ ..., description="A list of relevant keywords generated from the business description."
10
+ )
app/keywords/prompt.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_core.output_parsers import PydanticOutputParser
5
+ from .model import KeywordsResponse
6
+
7
+ # Initialize LLM
8
+ GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
9
+ if not GOOGLE_API_KEY:
10
+ raise EnvironmentError("GOOGLE_API_KEY not set in environment variables")
11
+
12
+ llm = ChatGoogleGenerativeAI(
13
+ model="gemini-2.5-flash",
14
+ temperature=0.0,
15
+ max_tokens=500,
16
+ timeout=60,
17
+ max_retries=3,
18
+ api_key=GOOGLE_API_KEY
19
+ )
20
+
21
+ # Set up parser
22
+ parser = PydanticOutputParser(pydantic_object=KeywordsResponse)
23
+
24
+ # Build prompt
25
+ prompt = ChatPromptTemplate.from_messages([
26
+ ("system", """
27
+ You are an expert SEO strategist and content marketer.
28
+ Generate the **top 10** most relevant keywords and key phrases
29
+ that a business should target, based on the following description.
30
+
31
+ **IMPORTANT**:
32
+ - Return _only_ a JSON object with a single key, `keywords`.
33
+ - The value must be an array of strings.
34
+ - Do NOT include any markdown, bullet lists, commentary, or extra keys.
35
+ {format_instructions}
36
+ """),
37
+ ("user", "{business_description}")
38
+ ]).partial(format_instructions=parser.get_format_instructions())
39
+
40
+ # Compose chain
41
+ chain = prompt | llm | parser
app/keywords/routes.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.keywords.model import BusinessDescription, KeywordsResponse
3
+ from app.keywords.keywords_service import generate_keywords_service
4
+
5
+ router = APIRouter(prefix="/keywords", tags=["keywords"])
6
+
7
+ @router.post("/generate", response_model=KeywordsResponse)
8
+ async def generate_keywords(business: BusinessDescription):
9
+ try:
10
+ response = generate_keywords_service(business)
11
+ return response
12
+ except Exception as e:
13
+ # Log exception if you have logging set up
14
+ raise HTTPException(status_code=500, detail=str(e))
app/main.py CHANGED
@@ -16,6 +16,29 @@ from app.rag.routes import router as rag_router
16
  from app.seo import routes as seo_routes
17
  from app.page_speed import routes as page_speed_routes
18
  from app.content_relevence import routes as content_relevance_routes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # ------------------------
21
  # Configure root logger
@@ -64,6 +87,9 @@ app.include_router(content_relevance_routes.router)
64
  # Mount PageSpeed router
65
  app.include_router(page_speed_routes.router)
66
 
 
 
 
67
  # Add CORS middleware
68
  app.add_middleware(
69
  CORSMiddleware,
 
16
  from app.seo import routes as seo_routes
17
  from app.page_speed import routes as page_speed_routes
18
  from app.content_relevence import routes as content_relevance_routes
19
+ from app.keywords.routes import router as keywords_router
20
+
21
+
22
+ # app/suppress_warnings.py
23
+
24
+ import warnings
25
+
26
+ # Suppress Pydantic config change warning
27
+ warnings.filterwarnings(
28
+ "ignore",
29
+ message="Valid config keys have changed in V2:*",
30
+ category=UserWarning,
31
+ module="pydantic._internal._config",
32
+ )
33
+
34
+ # Suppress other optional warnings
35
+ warnings.filterwarnings("ignore", category=FutureWarning)
36
+ try:
37
+ from langchain_core._api.deprecation import LangChainDeprecationWarning
38
+ warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
39
+ except ImportError:
40
+ pass
41
+
42
 
43
  # ------------------------
44
  # Configure root logger
 
87
  # Mount PageSpeed router
88
  app.include_router(page_speed_routes.router)
89
 
90
+ # Mount the keywords router
91
+ app.include_router(keywords_router)
92
+
93
  # Add CORS middleware
94
  app.add_middleware(
95
  CORSMiddleware,
app/page_speed/config.py CHANGED
@@ -8,6 +8,8 @@ class Settings(BaseSettings):
8
  # ───────────────────────────────────────────────────────────────────────────
9
  pagespeed_api_key: str
10
  gemini_api_key: str
 
 
11
 
12
  # ───────────────────────────────────────────────────────────────────────────
13
  # Chat & RAG Configuration
 
8
  # ───────────────────────────────────────────────────────────────────────────
9
  pagespeed_api_key: str
10
  gemini_api_key: str
11
+ google_api_key1: str
12
+
13
 
14
  # ───────────────────────────────────────────────────────────────────────────
15
  # Chat & RAG Configuration
app/rag/embeddings.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- from langchain_community.embeddings import HuggingFaceBgeEmbeddings
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from dotenv import load_dotenv
5
 
@@ -40,6 +40,6 @@ login(HF_TOKEN)
40
  model_name = "BAAI/bge-small-en-v1.5"
41
  model_kwargs = {"device": "cpu"}
42
  encode_kwargs = {"normalize_embeddings": True}
43
- embeddings = HuggingFaceBgeEmbeddings(
44
  model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
45
  )
 
1
  import os
2
+ from langchain_huggingface import HuggingFaceEmbeddings
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from dotenv import load_dotenv
5
 
 
40
  model_name = "BAAI/bge-small-en-v1.5"
41
  model_kwargs = {"device": "cpu"}
42
  encode_kwargs = {"normalize_embeddings": True}
43
+ embeddings = HuggingFaceEmbeddings(
44
  model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
45
  )
app/seo/models.py CHANGED
@@ -1,5 +1,25 @@
1
- from pydantic import BaseModel
2
- from typing import Any, Dict
 
 
 
 
 
3
 
4
  class SEORequest(BaseModel):
 
5
  seo_data: Dict[str, Any]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/seo/models.py
2
+ """
3
+ Pydantic models for SEO requests and recommendations.
4
+ """
5
+ from pydantic import BaseModel, Field
6
+ from typing import Any, Dict, List
7
+
8
 
9
  class SEORequest(BaseModel):
10
+ """Payload for incoming SEO data."""
11
  seo_data: Dict[str, Any]
12
+
13
+
14
+ class PrioritySuggestions(BaseModel):
15
+ """Categorized SEO suggestions by effort level."""
16
+ high: List[str] = Field(..., description="High-effort SEO suggestion strings.")
17
+ medium: List[str] = Field(..., description="Medium-effort SEO suggestion strings.")
18
+ low: List[str] = Field(..., description="Low-effort SEO suggestion strings.")
19
+
20
+
21
+ class Recommendation(BaseModel):
22
+ """Wrapper for prioritized SEO suggestions."""
23
+ priority_suggestions: PrioritySuggestions = Field(
24
+ ..., description="All SEO suggestions categorized by effort level."
25
+ )
app/seo/prompts.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prompt templates for SEO analysis services.
3
+ """
4
+
5
+ class SEOPrompts:
6
+ """
7
+ Container class for SEO-related prompt templates.
8
+ """
9
+
10
+ SYSTEM_PROMPT = """
11
+ You are an **Expert Web Performance Analyst & Optimization Engineer**.
12
+
13
+ Analyze the provided PageSpeed Insights performance report and extract **all** optimization recommendations.
14
+
15
+ Return *only* a JSON object that has a single top-level key, `priority_suggestions`, whose value is an object containing exactly three lists:
16
+ - `"high"`
17
+ - `"medium"`
18
+ - `"low"`
19
+
20
+ Each list item must be a **plain-English sentence**, prefixed with its SEO category tag (e.g. `[On-Page]` or `[Schema]`), and suffixed with `(Effort Level: high|medium|low)`.
21
+
22
+ {format_instructions}
23
+
24
+ Performance Report:
25
+ {report}
26
+
27
+ """
28
+
29
+ Report_PROMPT = """
30
+ You are an **Expert SEO Consultant** with advanced knowledge of on-page, technical, and off-page SEO.
31
+
32
+ Your task is to analyze this data and return a detailed SEO audit report as a **multi-line string** (not as JSON). Keep it structured, clear, and easy to read — for example, using sections, bullet points, and indentation.
33
+
34
+ Include these sections in your output:
35
+
36
+ ---
37
+
38
+ **Overall Summary**
39
+ - Overall SEO Score: (0–100)
40
+ - Grade: A, B, C, D, or F
41
+ - Top Strengths: List the top 3–5 strong areas
42
+ - Top Issues: List the top 3–5 weak/problematic areas
43
+
44
+ ---
45
+
46
+ **Metric Breakdown**
47
+ For each key metric in the data:
48
+ - Metric Name
49
+ - Value: ...
50
+ - Benchmark: ...
51
+ - Score: ...
52
+ - Status: good / needs improvement / critical
53
+ - Why It Matters: Explain simply
54
+ - Recommendation: What to fix or improve
55
+
56
+ ---
57
+
58
+ **Action Plan**
59
+ List 5 weakest metrics and how to fix them:
60
+ - Metric: ...
61
+ - Fix: ...
62
+ - Effort Level: low / medium / high
63
+
64
+ ---
65
+
66
+ **Monitoring Strategy**
67
+ - Frequency: weekly or monthly (based on severity of issues)
68
+ - Methods: Tools or techniques to track progress
69
+
70
+ ---
71
+
72
+ **Technical SEO**
73
+ If data is available, include:
74
+ - Core Web Vitals (LCP, FID, CLS)
75
+ - Page Speed Score
76
+ - Lazy Loading Enabled
77
+ - Security Headers Present
78
+
79
+ If not available, just write "Technical SEO data not available."
80
+
81
+ ---
82
+
83
+ **Schema Markup**
84
+ If available:
85
+ - Types Detected
86
+ - Is Valid: Yes/No
87
+ Else: "Schema markup data not available."
88
+
89
+ ---
90
+
91
+ **Backlink Profile**
92
+ If available:
93
+ - Referring Domains
94
+ - Toxic Links
95
+ - Recommendations to improve off-page SEO
96
+
97
+ ---
98
+
99
+ **Trend Comparison**
100
+ If available:
101
+ - Previous Score
102
+ - Score Change (increase, decrease, or no change)
103
+ - Comment
104
+
105
+ ---
106
+
107
+ ### ⚙️ Scoring Rules Summary (for reference):
108
+
109
+ - SEO Score: ≤50 = critical, 51–70 = needs improvement, >70 = good
110
+ - Meta Title: 50–60 chars = good, else needs improvement
111
+ - H1 Tags: exactly 1 = good, 0 or >1 = needs improvement/critical
112
+ - Heading Errors: any = critical
113
+ - Image Alt Tags: ≥90% = good, 50–89% = needs improvement, <50% = critical
114
+ - sitemapXmlCheck / robotsTxtCheck: missing = critical
115
+ - indexabilityCheck: false = critical
116
+ - internalLinksCount: <5 = needs improvement
117
+ - externalLinksCount: <2 = needs improvement
118
+
119
+ Use these rules to calculate metric status and overall grade:
120
+ - 90–100 → A
121
+ - 80–89 → B
122
+ - 70–79 → C
123
+ - 60–69 → D
124
+ - <60 → F
125
+
126
+ Things to aviod while generating the report
127
+ Don't:
128
+ 1- Do not write anything except the report
129
+ 2- Do not add anything in the start or end of the report
130
+ 3- Do not write text in the start of the report
131
+ 4- Do not write anything like this in the start that here is the report generated etc
132
+
133
+ SEO data provided in JSON format:
134
+ {seo_data}
135
+
136
+ """
app/seo/seo_service.py CHANGED
@@ -1,267 +1,109 @@
1
  """
2
  Business logic services for PageSpeed and SEO analysis.
3
  """
4
- import json
5
- import requests
6
  import logging
7
- import google.generativeai as genai
8
  from typing import Dict, Any
9
  from app.page_speed.config import settings
 
 
10
 
11
- # Create a module-level logger
 
 
 
 
12
  glogger = logging.getLogger(__name__)
13
 
14
  class SEOService:
15
  """
16
- Service class for generating SEO reports via Gemini.
17
  """
18
  def __init__(self):
19
- self.gemini_api_key = settings.gemini_api_key
20
- if self.gemini_api_key:
21
- glogger.info("Configuring Gemini AI for SEO reporting.")
22
- genai.configure(api_key=self.gemini_api_key)
23
- else:
24
- glogger.warning("No Gemini API key found. SEO reporting will fail if called.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def generate_seo_report(self, seo_data: Dict[str, Any]) -> str:
27
  """
28
- Generate an SEO audit report using Gemini AI.
29
 
30
  Args:
31
- seo_data (Dict[str, Any]): Collected SEO metrics in JSON format.
32
 
33
  Returns:
34
- str: JSON-formatted SEO report string
35
 
36
  Raises:
37
  Exception: If report generation fails
38
  """
39
- glogger.info("Starting SEO report generation.")
40
  if not self.gemini_api_key:
41
  msg = "Gemini API key not configured"
42
  glogger.error(msg)
43
  raise Exception(msg)
44
 
45
- prompt = self._create_seo_prompt(seo_data)
46
- glogger.debug("SEO prompt: %s...", prompt[:200])
47
 
48
  try:
49
- model = genai.GenerativeModel("gemini-2.0-flash")
50
- response = model.generate_content(prompt)
51
- text = getattr(response, "text", None)
52
- if not text:
53
- raise Exception("Empty response from Gemini")
54
  glogger.info("SEO report generated successfully.")
55
- return text.strip()
56
  except Exception as e:
57
  msg = f"Error generating SEO report: {e}"
58
  glogger.error(msg, exc_info=True)
59
  raise
60
 
61
- def _create_seo_prompt(self, seo_data: Dict[str, Any]) -> str:
62
- """
63
- Build the advanced prompt for SEO analysis based on the updated specialized template.
64
- """
65
- return f"""
66
- You are an **Expert SEO Consultant** with advanced knowledge of on-page, technical, and off-page SEO.
67
-
68
- Your task is to analyze this data and return a detailed SEO audit report as a **multi-line string** (not as JSON). Keep it structured, clear, and easy to read — for example, using sections, bullet points, and indentation.
69
-
70
- Include these sections in your output:
71
-
72
- ---
73
-
74
- **Overall Summary**
75
- - Overall SEO Score: (0–100)
76
- - Grade: A, B, C, D, or F
77
- - Top Strengths: List the top 3–5 strong areas
78
- - Top Issues: List the top 3–5 weak/problematic areas
79
-
80
- ---
81
-
82
- **Metric Breakdown**
83
- For each key metric in the data:
84
- - Metric Name
85
- - Value: ...
86
- - Benchmark: ...
87
- - Score: ...
88
- - Status: good / needs improvement / critical
89
- - Why It Matters: Explain simply
90
- - Recommendation: What to fix or improve
91
-
92
- ---
93
-
94
- **Action Plan**
95
- List 5 weakest metrics and how to fix them:
96
- - Metric: ...
97
- - Fix: ...
98
- - Effort Level: low / medium / high
99
-
100
- ---
101
-
102
- **Monitoring Strategy**
103
- - Frequency: weekly or monthly (based on severity of issues)
104
- - Methods: Tools or techniques to track progress
105
-
106
- ---
107
-
108
- **Technical SEO**
109
- If data is available, include:
110
- - Core Web Vitals (LCP, FID, CLS)
111
- - Page Speed Score
112
- - Lazy Loading Enabled
113
- - Security Headers Present
114
-
115
- If not available, just write “Technical SEO data not available.”
116
-
117
- ---
118
-
119
- **Schema Markup**
120
- If available:
121
- - Types Detected
122
- - Is Valid: Yes/No
123
- Else: “Schema markup data not available.”
124
-
125
- ---
126
-
127
- **Backlink Profile**
128
- If available:
129
- - Referring Domains
130
- - Toxic Links
131
- - Recommendations to improve off-page SEO
132
-
133
- ---
134
-
135
- **Trend Comparison**
136
- If available:
137
- - Previous Score
138
- - Score Change (increase, decrease, or no change)
139
- - Comment
140
-
141
- ---
142
-
143
- ### ⚙️ Scoring Rules Summary (for reference):
144
-
145
- - SEO Score: ≤50 = critical, 51–70 = needs improvement, >70 = good
146
- - Meta Title: 50–60 chars = good, else needs improvement
147
- - H1 Tags: exactly 1 = good, 0 or >1 = needs improvement/critical
148
- - Heading Errors: any = critical
149
- - Image Alt Tags: ≥90% = good, 50–89% = needs improvement, <50% = critical
150
- - sitemapXmlCheck / robotsTxtCheck: missing = critical
151
- - indexabilityCheck: false = critical
152
- - internalLinksCount: <5 = needs improvement
153
- - externalLinksCount: <2 = needs improvement
154
-
155
- Use these rules to calculate metric status and overall grade:
156
- - 90–100 → A
157
- - 80–89 → B
158
- - 70–79 → C
159
- - 60–69 → D
160
- - <60 → F
161
-
162
- SEO data provided in JSON format:
163
- {seo_data}
164
-
165
- """
166
-
167
- def generate_seo_priority(self, report: str) -> Dict[str, Any]:
168
  """
169
- Generate a dictionary of prioritized performance recommendations based on the Gemini-generated report.
170
 
171
  Args:
172
- report (str): The Gemini-generated performance report
173
 
174
  Returns:
175
- Dict[str, Any]: Dictionary mapping priority levels to optimization suggestions
176
-
177
- Raises:
178
- Exception: If the priority generation fails
179
  """
180
- glogger.info("Generating prioritized suggestions from the Gemini report.")
181
-
182
- if not self.gemini_api_key:
183
- msg = "Gemini API key not configured"
184
- glogger.error(msg)
185
- raise Exception(msg)
186
-
187
  try:
188
- model = genai.GenerativeModel("gemini-2.0-flash")
189
-
190
- prompt = f"""
191
- You are an **Expert Web Performance Analyst & Optimization Engineer**.
192
-
193
- Your task is to carefully analyze the provided PageSpeed Insights performance report.
194
- Extract **all** optimization recommendations and organize them into a JSON object with exactly these keys:
195
- - "high"
196
- - "medium"
197
- - "low"
198
- - "unknown"
199
-
200
- Extract and organize the optimization recommendations from the following performance report
201
- into a JSON object with exactly these keys: \"high\", \"medium\", \"low\", and \"unknown\".
202
- Each key’s value should be a list of suggestion strings.
203
-
204
- Classification Rules:
205
- 1. **Metric Reference:** For each suggestion, cite the metric name and full JSON path
206
- (e.g. `metrics[2].name == "Keyword Density"` or `metrics[6].value`).
207
- 2. **Benchmark Comparison:** Include both the **current value** and the **ideal benchmark**
208
- (e.g. `"Current: 15 keywords, Ideal: 1–3% density"`).
209
- 3. **Impact Estimate:** Quantify expected SEO impact (e.g. `"+12% CTR"` or `"+0.5 page rank score"`).
210
- 4. **Code Snippet:** Provide a ready‑to‑copy example if applicable
211
- (e.g. `<meta name="description" content="...">`).
212
- 5. **Category Tag:** Prefix with SEO domain—
213
- `[On-Page]`, `[Technical]`, `[Off-Page]`, `[Local]`, `[Schema]`.
214
- 6. **Platform Tip:** If applicable, include CMS or framework advice
215
- (e.g. `"WordPress: use Yoast SEO"`, `"Next.js: use next/head"`).
216
- 7. **Priority Classification:**
217
- - **High:** Any metric with score `"critical"` or < 60, or impact ≥ 10%.
218
- - **Medium:** Score 60–79 or impact 5–9%.
219
- - **Low:** Score 80–100 or impact < 5%.
220
- - **Unknown:** No score or impact data available.
221
- 8. Explain in easy english, avoiding technical jargon and explaination for technical terms.
222
-
223
-
224
- Important:
225
- - Respond with *only* a valid JSON object.
226
- - Do NOT include any commentary or explanation outside the JSON.
227
-
228
- Performance Report:
229
- {report}
230
- """
231
-
232
-
233
-
234
- response = model.generate_content(prompt)
235
- raw = (response.text or "").strip()
236
- glogger.debug("Raw priority response: %s", raw[:500] + ("…" if len(raw) > 500 else ""))
237
-
238
- # Locate the JSON portion by finding the first '{' and the last '}'
239
- start = raw.find('{')
240
- end = raw.rfind('}')
241
- if start == -1 or end == -1 or end <= start:
242
- raise ValueError("No JSON object found in Gemini response")
243
-
244
- json_str = raw[start:end+1]
245
- glogger.debug("Extracted JSON string: %s", json_str)
246
-
247
- suggestions = json.loads(json_str)
248
- if not isinstance(suggestions, dict):
249
- raise ValueError("Parsed JSON is not a dictionary")
250
-
251
- # Ensure all expected keys exist
252
- for key in ("high", "medium", "low", "unknown"):
253
- suggestions.setdefault(key, [])
254
-
255
- glogger.info("Priority suggestions generated successfully.")
256
- return suggestions
257
-
258
- except json.JSONDecodeError as je:
259
- msg = f"Failed to parse JSON from Gemini response: {je}"
260
- glogger.error(msg, exc_info=True)
261
- raise Exception(msg)
262
  except Exception as e:
263
  msg = f"Error generating priority suggestions: {e}"
264
  glogger.error(msg, exc_info=True)
265
  raise
266
-
267
-
 
1
  """
2
  Business logic services for PageSpeed and SEO analysis.
3
  """
4
+ import os
5
+ import getpass
6
  import logging
 
7
  from typing import Dict, Any
8
  from app.page_speed.config import settings
9
+ from app.seo.models import Recommendation, PrioritySuggestions
10
+ from app.seo.prompts import SEOPrompts
11
 
12
+ from langchain_google_genai import ChatGoogleGenerativeAI
13
+ from langchain_core.prompts import ChatPromptTemplate
14
+ from langchain_core.output_parsers import PydanticOutputParser
15
+
16
+ # Module-level logger
17
  glogger = logging.getLogger(__name__)
18
 
19
  class SEOService:
20
  """
21
+ Service class for generating SEO reports and prioritized suggestions via Gemini.
22
  """
23
  def __init__(self):
24
+ # configure Gemini key
25
+ key = settings.gemini_api_key or os.getenv("GEMINI_API_KEY")
26
+ if not key:
27
+ key = getpass.getpass("Enter your Gemini API key: ")
28
+ self.gemini_api_key = key
29
+
30
+ # initialize LangChain LLM wrapper
31
+ self.llm = ChatGoogleGenerativeAI(
32
+ model="gemini-2.5-flash",
33
+ temperature=0,
34
+ max_tokens=None,
35
+ timeout=None,
36
+ max_retries=3,
37
+ api_key=self.gemini_api_key
38
+ )
39
+
40
+ # Prompt template for raw SEO report
41
+ self.report_prompt = ChatPromptTemplate.from_messages([
42
+ ("system", SEOPrompts.Report_PROMPT),
43
+ ("human", "Please generate a comprehensive SEO audit report based on the following data:\n\n{seo_data}")
44
+ ])
45
+
46
+ # Prompt + parser for prioritized suggestions
47
+ self.parser = PydanticOutputParser(pydantic_object=Recommendation)
48
+ self.priority_chain = (
49
+ ChatPromptTemplate.from_messages([
50
+ ("system", SEOPrompts.SYSTEM_PROMPT),
51
+ ("human", "{report}")
52
+ ]).partial(format_instructions=self.parser.get_format_instructions())
53
+ | self.llm
54
+ | self.parser
55
+ )
56
 
57
  def generate_seo_report(self, seo_data: Dict[str, Any]) -> str:
58
  """
59
+ Generate an SEO audit report using Gemini AI via llm.invoke.
60
 
61
  Args:
62
+ seo_data (Dict[str, Any]): Collected SEO metrics in JSON-serializable format.
63
 
64
  Returns:
65
+ str: Raw text SEO report
66
 
67
  Raises:
68
  Exception: If report generation fails
69
  """
70
+ glogger.info("Starting SEO report generation via llm.invoke.")
71
  if not self.gemini_api_key:
72
  msg = "Gemini API key not configured"
73
  glogger.error(msg)
74
  raise Exception(msg)
75
 
76
+ prompt_input = {"seo_data": seo_data}
77
+ glogger.debug("Invoking LLM for SEO report with data keys: %s", list(seo_data.keys()))
78
 
79
  try:
80
+ # llm.invoke returns the raw string response
81
+ report_text: str = self.report_prompt | self.llm
82
+ report = report_text.invoke(prompt_input)
83
+ if not report:
84
+ raise Exception("Empty response from Gemini via llm.invoke")
85
  glogger.info("SEO report generated successfully.")
86
+ return report.content.strip()
87
  except Exception as e:
88
  msg = f"Error generating SEO report: {e}"
89
  glogger.error(msg, exc_info=True)
90
  raise
91
 
92
+ def generate_seo_priority(self, report: str) -> PrioritySuggestions:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  """
94
+ Generate prioritized SEO suggestions from a report via chain.invoke.
95
 
96
  Args:
97
+ report (str): SEO report content
98
 
99
  Returns:
100
+ PrioritySuggestions: Parsed, prioritized recommendations
 
 
 
101
  """
102
+ glogger.info("Generating prioritized SEO suggestions via chain.invoke.")
 
 
 
 
 
 
103
  try:
104
+ rec: Recommendation = self.priority_chain.invoke({"report": report})
105
+ return rec.priority_suggestions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  except Exception as e:
107
  msg = f"Error generating priority suggestions: {e}"
108
  glogger.error(msg, exc_info=True)
109
  raise
 
 
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
- fastapi==0.104.1
2
- uvicorn==0.24.0
3
- python-dotenv==1.0.0
4
- requests==2.31.0
5
- google-generativeai==0.3.2
6
- pydantic==2.5.0
7
  pydantic_settings
8
  langchain_groq
9
  langchain_community
@@ -11,5 +11,6 @@ faiss-cpu
11
  pymongo
12
  langchain-mongodb
13
  huggingface_hub
14
- python_dotenv
15
  sentence_transformers
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-dotenv
4
+ requests
5
+ google-generativeai
6
+ pydantic
7
  pydantic_settings
8
  langchain_groq
9
  langchain_community
 
11
  pymongo
12
  langchain-mongodb
13
  huggingface_hub
 
14
  sentence_transformers
15
+ langchain_google_genai
16
+ langchain_huggingface