Upload 10 files
Browse files- Dockerfile +28 -0
- README.md +29 -4
- agents/backlink_indexing.py +79 -0
- agents/competitor_intelligence.py +72 -0
- agents/content_optimizer.py +80 -0
- agents/orchestrator.py +45 -0
- agents/performance_analytics.py +113 -0
- agents/technical_auditor.py +75 -0
- main.py +142 -0
- requirements.txt +21 -0
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
curl \
|
| 9 |
+
git \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# Download Hugging Face models during build (cached in image)
|
| 17 |
+
RUN python -c "from transformers import pipeline; pipeline('token-classification', model='ml6team/keyphrase-extraction-distilbert-inspec')"
|
| 18 |
+
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
|
| 19 |
+
RUN python -c "from transformers import BartForConditionalGeneration, BartTokenizer; BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn'); BartTokenizer.from_pretrained('facebook/bart-large-cnn')"
|
| 20 |
+
|
| 21 |
+
# Copy application code
|
| 22 |
+
COPY . .
|
| 23 |
+
|
| 24 |
+
# Expose port (default for HF Spaces)
|
| 25 |
+
EXPOSE 7860
|
| 26 |
+
|
| 27 |
+
# Run FastAPI server
|
| 28 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,10 +1,35 @@
|
|
| 1 |
---
|
| 2 |
-
title: SEO
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: SEO Multi-Agent System
|
| 3 |
+
emoji: 🚀
|
| 4 |
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# SEO Multi-Agent System
|
| 12 |
+
|
| 13 |
+
Autonomous 24/7 SEO optimization with zero manual intervention.
|
| 14 |
+
|
| 15 |
+
## Features
|
| 16 |
+
|
| 17 |
+
- **Technical Auditing**: Automated crawl and quality checks.
|
| 18 |
+
- **Content Optimization**: AI-driven keyword and meta tag generation.
|
| 19 |
+
- **Competitor Intelligence**: SERP analysis (via Playwright).
|
| 20 |
+
- **Automated Indexing**: Google Search Console & IndexNow submission.
|
| 21 |
+
- **Performance Analytics**: GA4 & GSC monitoring.
|
| 22 |
+
|
| 23 |
+
## Configuration
|
| 24 |
+
|
| 25 |
+
This system is designed to be triggered by a Cloudflare Worker request.
|
| 26 |
+
|
| 27 |
+
### Environment Variables (Secrets)
|
| 28 |
+
Set these in your Space settings:
|
| 29 |
+
- `GSC_CREDENTIALS_JSON`: Content of your Google Search Console JSON key.
|
| 30 |
+
- `GA4_CREDENTIALS_JSON`: Content of your GA4 JSON key.
|
| 31 |
+
- `INDEXNOW_KEY`: Your IndexNow API key.
|
| 32 |
+
|
| 33 |
+
## Deployment
|
| 34 |
+
|
| 35 |
+
This Space runs automatically on Hugging Face infrastructure using the Docker SDK.
|
agents/backlink_indexing.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google.oauth2 import service_account
|
| 2 |
+
from googleapiclient.discovery import build
|
| 3 |
+
import requests
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import xml.etree.ElementTree as ET
|
| 7 |
+
from datetime import datetime, timedelta
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
class BacklinkIndexingAgent:
|
| 12 |
+
def __init__(self, gsc_credentials_path, site_url):
|
| 13 |
+
self.site_url = site_url
|
| 14 |
+
self.gsc = None
|
| 15 |
+
|
| 16 |
+
if os.path.exists(gsc_credentials_path):
|
| 17 |
+
try:
|
| 18 |
+
credentials = service_account.Credentials.from_service_account_file(
|
| 19 |
+
gsc_credentials_path,
|
| 20 |
+
scopes=['https://www.googleapis.com/auth/webmasters']
|
| 21 |
+
)
|
| 22 |
+
self.gsc = build('searchconsole', 'v1', credentials=credentials)
|
| 23 |
+
except Exception as e:
|
| 24 |
+
logger.error(f"Failed to initialize GSC client: {e}")
|
| 25 |
+
else:
|
| 26 |
+
logger.warning(f"GSC credentials not found at {gsc_credentials_path}")
|
| 27 |
+
|
| 28 |
+
def submit_to_indexnow(self, urls, api_key="your-key", key_location=""):
|
| 29 |
+
try:
|
| 30 |
+
endpoint = "https://api.indexnow.org/IndexNow"
|
| 31 |
+
host = self.site_url.replace('https://', '').replace('http://', '')
|
| 32 |
+
payload = {
|
| 33 |
+
"host": host,
|
| 34 |
+
"key": api_key,
|
| 35 |
+
"keyLocation": key_location or f"{self.site_url}/indexnow-key.txt",
|
| 36 |
+
"urlList": urls
|
| 37 |
+
}
|
| 38 |
+
response = requests.post(endpoint, json=payload, timeout=10)
|
| 39 |
+
return response.status_code == 200
|
| 40 |
+
except Exception as e:
|
| 41 |
+
logger.error(f"IndexNow submission failed: {e}")
|
| 42 |
+
return False
|
| 43 |
+
|
| 44 |
+
def submit_to_google(self, url):
|
| 45 |
+
if not self.gsc: return False
|
| 46 |
+
try:
|
| 47 |
+
body = {'url': url, 'type': 'URL_UPDATED'}
|
| 48 |
+
self.gsc.urlInspection().index().inspect(
|
| 49 |
+
siteUrl=self.site_url,
|
| 50 |
+
body=body
|
| 51 |
+
).execute()
|
| 52 |
+
return True
|
| 53 |
+
except Exception as e:
|
| 54 |
+
logger.error(f"GSC submission failed for {url}: {e}")
|
| 55 |
+
return False
|
| 56 |
+
|
| 57 |
+
def auto_submit_new_pages(self, new_urls):
|
| 58 |
+
results = {'indexnow': False, 'google': []}
|
| 59 |
+
|
| 60 |
+
# IndexNow
|
| 61 |
+
if new_urls:
|
| 62 |
+
results['indexnow'] = self.submit_to_indexnow(new_urls)
|
| 63 |
+
|
| 64 |
+
# Google (Limit 200/day normally)
|
| 65 |
+
for url in new_urls[:10]:
|
| 66 |
+
full_url = url if url.startswith('http') else f"{self.site_url}{url}"
|
| 67 |
+
success = self.submit_to_google(full_url)
|
| 68 |
+
results['google'].append({'url': url, 'success': success})
|
| 69 |
+
|
| 70 |
+
return results
|
| 71 |
+
|
| 72 |
+
def check_indexing_status(self):
|
| 73 |
+
if not self.gsc: return []
|
| 74 |
+
# In a real impl, query searchAnalytics or urlInspection
|
| 75 |
+
# Placeholder for demonstration
|
| 76 |
+
return []
|
| 77 |
+
|
| 78 |
+
def get_indexing_errors(self):
|
| 79 |
+
return []
|
agents/competitor_intelligence.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from playwright.sync_api import sync_playwright
|
| 2 |
+
from sentence_transformers import SentenceTransformer, util
|
| 3 |
+
import time
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class CompetitorIntelligenceAgent:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
try:
|
| 11 |
+
self.semantic_model = SentenceTransformer('all-mpnet-base-v2')
|
| 12 |
+
except Exception as e:
|
| 13 |
+
logger.error(f"Failed to load semantic model: {e}")
|
| 14 |
+
|
| 15 |
+
def scrape_serp(self, keyword):
|
| 16 |
+
results = []
|
| 17 |
+
try:
|
| 18 |
+
with sync_playwright() as p:
|
| 19 |
+
browser = p.chromium.launch(headless=True)
|
| 20 |
+
page = browser.new_page()
|
| 21 |
+
|
| 22 |
+
# Search Google (Note: frequent scraping might get blocked)
|
| 23 |
+
page.goto(f"https://www.google.com/search?q={keyword}&num=10")
|
| 24 |
+
time.sleep(2) # Be polite
|
| 25 |
+
|
| 26 |
+
# Extract results (Selectors might need maintenance)
|
| 27 |
+
links = page.locator('div.g a').all()
|
| 28 |
+
for link in links:
|
| 29 |
+
url = link.get_attribute('href')
|
| 30 |
+
if url and 'google' not in url:
|
| 31 |
+
results.append(url)
|
| 32 |
+
if len(results) >= 5: break
|
| 33 |
+
|
| 34 |
+
browser.close()
|
| 35 |
+
except Exception as e:
|
| 36 |
+
logger.error(f"SERP scraping failed: {e}")
|
| 37 |
+
|
| 38 |
+
return results
|
| 39 |
+
|
| 40 |
+
def analyze_content_gaps(self, your_content, competitor_contents):
|
| 41 |
+
if not hasattr(self, 'semantic_model'): return []
|
| 42 |
+
|
| 43 |
+
your_emb = self.semantic_model.encode(your_content, convert_to_tensor=True)
|
| 44 |
+
comp_embs = self.semantic_model.encode(competitor_contents, convert_to_tensor=True)
|
| 45 |
+
|
| 46 |
+
similarities = util.cos_sim(your_emb, comp_embs)
|
| 47 |
+
|
| 48 |
+
gaps = []
|
| 49 |
+
for idx, sim in enumerate(similarities[0]):
|
| 50 |
+
if sim < 0.6:
|
| 51 |
+
gaps.append({
|
| 52 |
+
"competitor_idx": idx,
|
| 53 |
+
"similarity": float(sim),
|
| 54 |
+
"note": "Topic gap detected"
|
| 55 |
+
})
|
| 56 |
+
return gaps
|
| 57 |
+
|
| 58 |
+
def generate_competitive_report(self, keyword):
|
| 59 |
+
logger.info(f"Generating report for keyword: {keyword}")
|
| 60 |
+
|
| 61 |
+
competitor_urls = self.scrape_serp(keyword)
|
| 62 |
+
competitor_contents = []
|
| 63 |
+
|
| 64 |
+
# In a real run, we would visit each URL to get content
|
| 65 |
+
# For this skeleton, we'll placeholder
|
| 66 |
+
|
| 67 |
+
return {
|
| 68 |
+
"keyword": keyword,
|
| 69 |
+
"top_competitors": competitor_urls,
|
| 70 |
+
"analysis_status": "completed_basic",
|
| 71 |
+
"message": "Deep content analysis requires full scraping infrastructure which is rate-limit sensitive."
|
| 72 |
+
}
|
agents/content_optimizer.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline, BartForConditionalGeneration, BartTokenizer, T5ForConditionalGeneration, T5Tokenizer
|
| 2 |
+
from sentence_transformers import SentenceTransformer, util
|
| 3 |
+
import requests
|
| 4 |
+
from bs4 import BeautifulSoup
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
class ContentOptimizationAgent:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
try:
|
| 12 |
+
self.keyword_extractor = pipeline(
|
| 13 |
+
"token-classification",
|
| 14 |
+
model="ml6team/keyphrase-extraction-distilbert-inspec",
|
| 15 |
+
aggregation_strategy="simple"
|
| 16 |
+
)
|
| 17 |
+
self.meta_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
|
| 18 |
+
self.meta_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
|
| 19 |
+
self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 20 |
+
|
| 21 |
+
# Load T5 only if needed to save memory, or here if sufficient RAM (Space has 16GB)
|
| 22 |
+
self.title_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
|
| 23 |
+
self.title_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
|
| 24 |
+
except Exception as e:
|
| 25 |
+
logger.error(f"Failed to load models for Content Optimizer: {e}")
|
| 26 |
+
|
| 27 |
+
def extract_keywords(self, text):
|
| 28 |
+
if not hasattr(self, 'keyword_extractor'): return []
|
| 29 |
+
results = self.keyword_extractor(text[:512]) # Limit for speed
|
| 30 |
+
keywords = [r['word'] for r in results if r['score'] > 0.7]
|
| 31 |
+
return list(set(keywords))
|
| 32 |
+
|
| 33 |
+
def generate_meta_description(self, text, max_length=160):
|
| 34 |
+
if not hasattr(self, 'meta_model'): return ""
|
| 35 |
+
inputs = self.meta_tokenizer(text[:1024], return_tensors="pt", truncation=True)
|
| 36 |
+
summary_ids = self.meta_model.generate(
|
| 37 |
+
inputs["input_ids"],
|
| 38 |
+
max_length=max_length,
|
| 39 |
+
min_length=100,
|
| 40 |
+
length_penalty=2.0,
|
| 41 |
+
num_beams=4,
|
| 42 |
+
early_stopping=True
|
| 43 |
+
)
|
| 44 |
+
return self.meta_tokenizer.decode(summary_ids, skip_special_tokens=True)
|
| 45 |
+
|
| 46 |
+
def optimize_title(self, content, keyword):
|
| 47 |
+
if not hasattr(self, 'title_model'): return ""
|
| 48 |
+
prompt = f"Write an SEO-optimized page title under 60 characters for content about '{keyword}': {content[:500]}"
|
| 49 |
+
inputs = self.title_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
| 50 |
+
outputs = self.title_model.generate(inputs["input_ids"], max_length=60, num_beams=5)
|
| 51 |
+
return self.title_tokenizer.decode(outputs, skip_special_tokens=True)
|
| 52 |
+
|
| 53 |
+
def analyze_page(self, url):
|
| 54 |
+
logger.info(f"Optimizing content for: {url}")
|
| 55 |
+
try:
|
| 56 |
+
response = requests.get(url, timeout=10)
|
| 57 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 58 |
+
content = soup.get_text(separator=' ', strip=True)
|
| 59 |
+
|
| 60 |
+
keywords = self.extract_keywords(content)
|
| 61 |
+
suggested_meta = self.generate_meta_description(content)
|
| 62 |
+
|
| 63 |
+
# Detect main keyword (simple frequency for now, or use first extracted)
|
| 64 |
+
main_keyword = keywords[0] if keywords else "general"
|
| 65 |
+
suggested_title = self.optimize_title(content, main_keyword)
|
| 66 |
+
|
| 67 |
+
recommendations = []
|
| 68 |
+
if len(keywords) < 3:
|
| 69 |
+
recommendations.append("Content might be too thin; few keywords detected.")
|
| 70 |
+
|
| 71 |
+
return {
|
| 72 |
+
"url": url,
|
| 73 |
+
"extracted_keywords": keywords,
|
| 74 |
+
"suggested_title": suggested_title,
|
| 75 |
+
"suggested_meta_description": suggested_meta,
|
| 76 |
+
"recommendations": recommendations
|
| 77 |
+
}
|
| 78 |
+
except Exception as e:
|
| 79 |
+
logger.error(f"Content optimization failed: {e}")
|
| 80 |
+
return {"error": str(e)}
|
agents/orchestrator.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import redis
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class OrchestratorAgent:
|
| 9 |
+
def __init__(self, redis_host='localhost', redis_port=6379, agents=None):
|
| 10 |
+
self.agents = agents or {}
|
| 11 |
+
try:
|
| 12 |
+
self.redis = redis.Redis(host=redis_host, port=redis_port, decode_responses=True)
|
| 13 |
+
# Test connection
|
| 14 |
+
self.redis.ping()
|
| 15 |
+
except Exception as e:
|
| 16 |
+
logger.warning(f"Redis connection failed (Orchestrator): {e}")
|
| 17 |
+
self.redis = None
|
| 18 |
+
|
| 19 |
+
def create_workflow(self, workflow_name, payload):
|
| 20 |
+
workflow_id = f"workflow_{workflow_name}_{datetime.now().timestamp()}"
|
| 21 |
+
logger.info(f"Creating workflow {workflow_id}")
|
| 22 |
+
|
| 23 |
+
# In a real system, you'd decompose this into steps and enqueue them
|
| 24 |
+
# For this prototype, we just log it and maybe store state
|
| 25 |
+
|
| 26 |
+
if self.redis:
|
| 27 |
+
try:
|
| 28 |
+
self.redis.set(workflow_id, json.dumps({
|
| 29 |
+
"name": workflow_name,
|
| 30 |
+
"payload": payload,
|
| 31 |
+
"status": "created",
|
| 32 |
+
"created_at": datetime.now().isoformat()
|
| 33 |
+
}))
|
| 34 |
+
self.redis.expire(workflow_id, 86400) # 1 day expiry
|
| 35 |
+
except Exception as e:
|
| 36 |
+
logger.error(f"Failed to save workflow state: {e}")
|
| 37 |
+
|
| 38 |
+
return workflow_id
|
| 39 |
+
|
| 40 |
+
def monitor_agent_health(self):
|
| 41 |
+
# Check system health
|
| 42 |
+
return {
|
| 43 |
+
"orchestrator": "healthy",
|
| 44 |
+
"redis": "connected" if self.redis else "disconnected"
|
| 45 |
+
}
|
agents/performance_analytics.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google.analytics.data_v1beta import BetaAnalyticsDataClient
|
| 2 |
+
from google.analytics.data_v1beta.types import RunReportRequest, DateRange, Metric, Dimension
|
| 3 |
+
from google.oauth2 import service_account
|
| 4 |
+
from googleapiclient.discovery import build
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
+
import requests
|
| 8 |
+
import logging
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
class PerformanceAnalyticsAgent:
|
| 14 |
+
def __init__(self, ga4_property_id, ga4_credentials_path, gsc_credentials_path, site_url):
|
| 15 |
+
self.property_id = ga4_property_id
|
| 16 |
+
self.site_url = site_url
|
| 17 |
+
self.ga4_client = None
|
| 18 |
+
self.gsc = None
|
| 19 |
+
|
| 20 |
+
if os.path.exists(ga4_credentials_path):
|
| 21 |
+
try:
|
| 22 |
+
creds = service_account.Credentials.from_service_account_file(ga4_credentials_path)
|
| 23 |
+
self.ga4_client = BetaAnalyticsDataClient(credentials=creds)
|
| 24 |
+
except Exception as e: logger.error(f"GA4 init failed: {e}")
|
| 25 |
+
|
| 26 |
+
if os.path.exists(gsc_credentials_path):
|
| 27 |
+
try:
|
| 28 |
+
creds = service_account.Credentials.from_service_account_file(gsc_credentials_path)
|
| 29 |
+
self.gsc = build('searchconsole', 'v1', credentials=creds)
|
| 30 |
+
except Exception as e: logger.error(f"GSC init failed: {e}")
|
| 31 |
+
|
| 32 |
+
def get_underperforming_pages(self, days=30):
|
| 33 |
+
if not self.gsc: return []
|
| 34 |
+
try:
|
| 35 |
+
response = self.gsc.searchanalytics().query(
|
| 36 |
+
siteUrl=self.site_url,
|
| 37 |
+
body={
|
| 38 |
+
'startDate': (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d'),
|
| 39 |
+
'endDate': datetime.now().strftime('%Y-%m-%d'),
|
| 40 |
+
'dimensions': ['page'],
|
| 41 |
+
'rowLimit': 100
|
| 42 |
+
}
|
| 43 |
+
).execute()
|
| 44 |
+
|
| 45 |
+
underperforming = []
|
| 46 |
+
for row in response.get('rows', []):
|
| 47 |
+
ctr = row['ctr']
|
| 48 |
+
if row['impressions'] > 100 and ctr < 0.02:
|
| 49 |
+
underperforming.append({
|
| 50 |
+
'url': row['keys'][0],
|
| 51 |
+
'impressions': row['impressions'],
|
| 52 |
+
'ctr': ctr
|
| 53 |
+
})
|
| 54 |
+
return underperforming
|
| 55 |
+
except Exception as e:
|
| 56 |
+
logger.error(f"GSC query failed: {e}")
|
| 57 |
+
return []
|
| 58 |
+
|
| 59 |
+
def detect_algorithm_update(self):
|
| 60 |
+
# Simplified volatility check
|
| 61 |
+
# In production this would compare day-over-day ranking distributions
|
| 62 |
+
return {
|
| 63 |
+
'volatility_detected': False,
|
| 64 |
+
'avg_position_change': 0.0
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def generate_weekly_report(self):
|
| 68 |
+
report = {
|
| 69 |
+
'summary': {'total_pageviews': 0},
|
| 70 |
+
'top_pages': [],
|
| 71 |
+
'underperforming_pages': []
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
if self.ga4_client:
|
| 75 |
+
try:
|
| 76 |
+
request = RunReportRequest(
|
| 77 |
+
property=f"properties/{self.property_id}",
|
| 78 |
+
date_ranges=[DateRange(start_date="7daysAgo", end_date="today")],
|
| 79 |
+
dimensions=[Dimension(name="pagePath")],
|
| 80 |
+
metrics=[Metric(name="screenPageViews")]
|
| 81 |
+
)
|
| 82 |
+
response = self.ga4_client.run_report(request)
|
| 83 |
+
|
| 84 |
+
total_pv = 0
|
| 85 |
+
for row in response.rows:
|
| 86 |
+
pv = int(row.metric_values[0].value)
|
| 87 |
+
total_pv += pv
|
| 88 |
+
report['top_pages'].append({
|
| 89 |
+
'path': row.dimension_values[0].value,
|
| 90 |
+
'views': pv
|
| 91 |
+
})
|
| 92 |
+
report['summary']['total_pageviews'] = total_pv
|
| 93 |
+
except Exception as e:
|
| 94 |
+
logger.error(f"GA4 report failed: {e}")
|
| 95 |
+
|
| 96 |
+
return report
|
| 97 |
+
|
| 98 |
+
def monitor_core_web_vitals(self, url):
|
| 99 |
+
# Uses public PageSpeed Insights API
|
| 100 |
+
try:
|
| 101 |
+
psi_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
|
| 102 |
+
params = {'url': url, 'strategy': 'mobile', 'category': ['performance', 'seo']}
|
| 103 |
+
resp = requests.get(psi_url, params=params)
|
| 104 |
+
data = resp.json()
|
| 105 |
+
|
| 106 |
+
lighthouse = data.get('lighthouseResult', {})
|
| 107 |
+
return {
|
| 108 |
+
'url': url,
|
| 109 |
+
'performance_score': lighthouse.get('categories', {}).get('performance', {}).get('score')
|
| 110 |
+
}
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.error(f"CWV check failed: {e}")
|
| 113 |
+
return {'error': str(e)}
|
agents/technical_auditor.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class TechnicalAuditorAgent:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
try:
|
| 11 |
+
self.classifier = pipeline(
|
| 12 |
+
"zero-shot-classification",
|
| 13 |
+
model="facebook/bart-large-mnli",
|
| 14 |
+
device=-1 # CPU inference
|
| 15 |
+
)
|
| 16 |
+
except Exception as e:
|
| 17 |
+
logger.warning(f"Could not load ML models for Technical Auditor: {e}")
|
| 18 |
+
self.classifier = None
|
| 19 |
+
|
| 20 |
+
def assess_content_quality(self, page_text):
|
| 21 |
+
if not self.classifier:
|
| 22 |
+
return ["error-model-not-loaded"]
|
| 23 |
+
|
| 24 |
+
result = self.classifier(
|
| 25 |
+
page_text[:1024], # Limit text length for speed
|
| 26 |
+
candidate_labels=["high-quality", "thin-content", "spam", "keyword-stuffing"]
|
| 27 |
+
)
|
| 28 |
+
return result['labels']
|
| 29 |
+
|
| 30 |
+
def audit_page(self, url):
|
| 31 |
+
logger.info(f"Auditing page: {url}")
|
| 32 |
+
results = {
|
| 33 |
+
"url": url,
|
| 34 |
+
"broken_links": [],
|
| 35 |
+
"missing_alt": [],
|
| 36 |
+
"content_quality": "unknown"
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
# Note: In a real scenario, you might need to handle headers/user-agents
|
| 41 |
+
response = requests.get(url, timeout=10)
|
| 42 |
+
if response.status_code != 200:
|
| 43 |
+
logger.error(f"Page returned status {response.status_code}")
|
| 44 |
+
return results
|
| 45 |
+
|
| 46 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 47 |
+
|
| 48 |
+
# Check content quality
|
| 49 |
+
text_content = soup.get_text(separator=' ', strip=True)
|
| 50 |
+
results['content_quality'] = self.assess_content_quality(text_content)
|
| 51 |
+
|
| 52 |
+
# Check images
|
| 53 |
+
images = soup.find_all('img')
|
| 54 |
+
for img in images:
|
| 55 |
+
if not img.get('alt'):
|
| 56 |
+
results['missing_alt'].append(img.get('src'))
|
| 57 |
+
|
| 58 |
+
# Check links (simplified)
|
| 59 |
+
links = soup.find_all('a')
|
| 60 |
+
for link in links:
|
| 61 |
+
href = link.get('href')
|
| 62 |
+
if href and href.startswith('http'):
|
| 63 |
+
try:
|
| 64 |
+
head = requests.head(href, timeout=5)
|
| 65 |
+
if head.status_code >= 400:
|
| 66 |
+
results['broken_links'].append(href)
|
| 67 |
+
except:
|
| 68 |
+
results['broken_links'].append(href)
|
| 69 |
+
|
| 70 |
+
except Exception as e:
|
| 71 |
+
logger.error(f"Audit failed for {url}: {e}")
|
| 72 |
+
|
| 73 |
+
logger.info(f"Audit complete for {url}: {results}")
|
| 74 |
+
# In a real system, you would post these results back to a webhook or database
|
| 75 |
+
return results
|
main.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, BackgroundTasks, HTTPException, Request
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import List, Optional, Dict, Any
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
# Import agents (we will create these files next)
|
| 9 |
+
from agents.technical_auditor import TechnicalAuditorAgent
|
| 10 |
+
from agents.content_optimizer import ContentOptimizationAgent
|
| 11 |
+
from agents.competitor_intelligence import CompetitorIntelligenceAgent
|
| 12 |
+
from agents.backlink_indexing import BacklinkIndexingAgent
|
| 13 |
+
from agents.performance_analytics import PerformanceAnalyticsAgent
|
| 14 |
+
from agents.orchestrator import OrchestratorAgent
|
| 15 |
+
|
| 16 |
+
logging.basicConfig(level=logging.INFO)
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
app = FastAPI(title="SEO Multi-Agent System", version="1.0.0")
|
| 20 |
+
|
| 21 |
+
# Initialize agents
|
| 22 |
+
# Note: In a real deployment, credentials would be loaded from env vars or mounted secrets
|
| 23 |
+
technical_agent = TechnicalAuditorAgent()
|
| 24 |
+
content_agent = ContentOptimizationAgent()
|
| 25 |
+
competitor_agent = CompetitorIntelligenceAgent()
|
| 26 |
+
indexing_agent = BacklinkIndexingAgent(
|
| 27 |
+
gsc_credentials_path="/app/credentials/gsc-credentials.json",
|
| 28 |
+
site_url="https://yoursite.pages.dev" # Replace with actual site
|
| 29 |
+
)
|
| 30 |
+
performance_agent = PerformanceAnalyticsAgent(
|
| 31 |
+
ga4_property_id="your-property-id", # Replace
|
| 32 |
+
ga4_credentials_path="/app/credentials/ga4-credentials.json",
|
| 33 |
+
gsc_credentials_path="/app/credentials/gsc-credentials.json",
|
| 34 |
+
site_url="https://yoursite.pages.dev" # Replace
|
| 35 |
+
)
|
| 36 |
+
orchestrator = OrchestratorAgent(redis_host="redis", redis_port=6379, agents={}) # Redis host depends on service name in docker-compose or similar
|
| 37 |
+
|
| 38 |
+
# --- MODELS ---
|
| 39 |
+
|
| 40 |
+
class PageAuditRequest(BaseModel):
|
| 41 |
+
url: str
|
| 42 |
+
checks: Optional[List[str]] = ["all"]
|
| 43 |
+
|
| 44 |
+
class ContentOptimizeRequest(BaseModel):
|
| 45 |
+
url: str
|
| 46 |
+
tasks: Optional[List[str]] = ["all"]
|
| 47 |
+
|
| 48 |
+
class CompetitorAnalysisRequest(BaseModel):
|
| 49 |
+
keyword: str
|
| 50 |
+
|
| 51 |
+
class IndexingRequest(BaseModel):
|
| 52 |
+
urls: List[str]
|
| 53 |
+
|
| 54 |
+
class WorkflowTrigger(BaseModel):
|
| 55 |
+
workflow_name: str
|
| 56 |
+
payload: Dict[str, Any]
|
| 57 |
+
|
| 58 |
+
# --- ENDPOINTS ---
|
| 59 |
+
|
| 60 |
+
@app.get("/")
|
| 61 |
+
async def root():
|
| 62 |
+
return {
|
| 63 |
+
"service": "SEO Multi-Agent System",
|
| 64 |
+
"status": "running",
|
| 65 |
+
"agents": ["technical", "content", "competitor", "indexing", "performance"]
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
@app.get("/health")
|
| 69 |
+
async def health_check():
|
| 70 |
+
# Basic check, can expand to check agents
|
| 71 |
+
return {"status": "healthy", "orchestrator": "online"}
|
| 72 |
+
|
| 73 |
+
# TECHNICAL AUDITOR
|
| 74 |
+
@app.post("/audit/execute")
|
| 75 |
+
async def execute_audit(request: PageAuditRequest, background_tasks: BackgroundTasks):
|
| 76 |
+
background_tasks.add_task(technical_agent.audit_page, request.url)
|
| 77 |
+
return {"status": "started", "url": request.url}
|
| 78 |
+
|
| 79 |
+
# CONTENT OPTIMIZER
|
| 80 |
+
@app.post("/optimize")
|
| 81 |
+
async def optimize_content(request: ContentOptimizeRequest):
|
| 82 |
+
result = content_agent.analyze_page(request.url)
|
| 83 |
+
return result
|
| 84 |
+
|
| 85 |
+
# COMPETITOR INTELLIGENCE
|
| 86 |
+
@app.post("/competitor/analyze")
|
| 87 |
+
async def analyze_competitor(request: CompetitorAnalysisRequest, background_tasks: BackgroundTasks):
|
| 88 |
+
background_tasks.add_task(competitor_agent.generate_competitive_report, request.keyword)
|
| 89 |
+
return {"status": "started", "keyword": request.keyword}
|
| 90 |
+
|
| 91 |
+
# INDEXING
|
| 92 |
+
@app.post("/index/submit")
|
| 93 |
+
async def submit_indexing(request: IndexingRequest):
|
| 94 |
+
results = indexing_agent.auto_submit_new_pages(request.urls)
|
| 95 |
+
return results
|
| 96 |
+
|
| 97 |
+
@app.get("/index/status")
|
| 98 |
+
async def indexing_status():
|
| 99 |
+
status = indexing_agent.check_indexing_status()
|
| 100 |
+
errors = indexing_agent.get_indexing_errors()
|
| 101 |
+
return {"indexed_pages": status, "errors": errors}
|
| 102 |
+
|
| 103 |
+
# PERFORMANCE & ANALYTICS
|
| 104 |
+
@app.get("/analytics/underperforming")
|
| 105 |
+
async def get_underperforming():
|
| 106 |
+
try:
|
| 107 |
+
pages = performance_agent.get_underperforming_pages(days=30)
|
| 108 |
+
return pages
|
| 109 |
+
except Exception as e:
|
| 110 |
+
logger.error(f"Error getting underperforming pages: {e}")
|
| 111 |
+
return []
|
| 112 |
+
|
| 113 |
+
@app.get("/report/weekly")
|
| 114 |
+
async def weekly_report():
|
| 115 |
+
try:
|
| 116 |
+
report = performance_agent.generate_weekly_report()
|
| 117 |
+
return report
|
| 118 |
+
except Exception as e:
|
| 119 |
+
logger.error(f"Error generating weekly report: {e}")
|
| 120 |
+
return {"error": str(e)}
|
| 121 |
+
|
| 122 |
+
@app.get("/monitor/algorithm")
|
| 123 |
+
async def check_algorithm():
|
| 124 |
+
try:
|
| 125 |
+
status = performance_agent.detect_algorithm_update()
|
| 126 |
+
return status
|
| 127 |
+
except Exception as e:
|
| 128 |
+
return {"volatility_detected": False, "error": str(e)}
|
| 129 |
+
|
| 130 |
+
@app.get("/monitor/cwv")
|
| 131 |
+
async def check_cwv(url: str):
|
| 132 |
+
result = performance_agent.monitor_core_web_vitals(url)
|
| 133 |
+
return result
|
| 134 |
+
|
| 135 |
+
# ORCHESTRATOR
|
| 136 |
+
@app.post("/workflow/trigger")
|
| 137 |
+
async def trigger_workflow(request: WorkflowTrigger):
|
| 138 |
+
workflow_id = orchestrator.create_workflow(
|
| 139 |
+
request.workflow_name,
|
| 140 |
+
request.payload
|
| 141 |
+
)
|
| 142 |
+
return {"workflow_id": workflow_id, "status": "started"}
|
requirements.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.109.0
|
| 2 |
+
uvicorn[standard]==0.27.0
|
| 3 |
+
transformers==4.36.0
|
| 4 |
+
torch==2.1.0
|
| 5 |
+
sentence-transformers==2.3.1
|
| 6 |
+
beautifulsoup4==4.12.3
|
| 7 |
+
scrapy==2.11.0
|
| 8 |
+
playwright==1.41.0
|
| 9 |
+
pandas==2.2.0
|
| 10 |
+
numpy==1.26.3
|
| 11 |
+
scikit-learn==1.4.0
|
| 12 |
+
redis==5.0.1
|
| 13 |
+
psycopg2-binary==2.9.9
|
| 14 |
+
google-api-python-client==2.116.0
|
| 15 |
+
google-auth==2.27.0
|
| 16 |
+
google-analytics-data==0.18.5
|
| 17 |
+
apscheduler==3.10.4
|
| 18 |
+
requests==2.31.0
|
| 19 |
+
pillow==10.2.0
|
| 20 |
+
lxml==5.1.0
|
| 21 |
+
keybert==0.8.4
|