pkm13 commited on
Commit
e5ab217
·
verified ·
1 Parent(s): 24b8867

Upload 10 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ curl \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy requirements
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Download Hugging Face models during build (cached in image)
17
+ RUN python -c "from transformers import pipeline; pipeline('token-classification', model='ml6team/keyphrase-extraction-distilbert-inspec')"
18
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
19
+ RUN python -c "from transformers import BartForConditionalGeneration, BartTokenizer; BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn'); BartTokenizer.from_pretrained('facebook/bart-large-cnn')"
20
+
21
+ # Copy application code
22
+ COPY . .
23
+
24
+ # Expose port (default for HF Spaces)
25
+ EXPOSE 7860
26
+
27
+ # Run FastAPI server
28
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,35 @@
1
  ---
2
- title: SEO
3
- emoji: 🐢
4
  colorFrom: blue
5
- colorTo: purple
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: SEO Multi-Agent System
3
+ emoji: 🚀
4
  colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
+ # SEO Multi-Agent System
12
+
13
+ Autonomous 24/7 SEO optimization with zero manual intervention.
14
+
15
+ ## Features
16
+
17
+ - **Technical Auditing**: Automated crawl and quality checks.
18
+ - **Content Optimization**: AI-driven keyword and meta tag generation.
19
+ - **Competitor Intelligence**: SERP analysis (via Playwright).
20
+ - **Automated Indexing**: Google Search Console & IndexNow submission.
21
+ - **Performance Analytics**: GA4 & GSC monitoring.
22
+
23
+ ## Configuration
24
+
25
+ This system is designed to be triggered by a Cloudflare Worker request.
26
+
27
+ ### Environment Variables (Secrets)
28
+ Set these in your Space settings:
29
+ - `GSC_CREDENTIALS_JSON`: Content of your Google Search Console JSON key.
30
+ - `GA4_CREDENTIALS_JSON`: Content of your GA4 JSON key.
31
+ - `INDEXNOW_KEY`: Your IndexNow API key.
32
+
33
+ ## Deployment
34
+
35
+ This Space runs automatically on Hugging Face infrastructure using the Docker SDK.
agents/backlink_indexing.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.oauth2 import service_account
2
+ from googleapiclient.discovery import build
3
+ import requests
4
+ import logging
5
+ import os
6
+ import xml.etree.ElementTree as ET
7
+ from datetime import datetime, timedelta
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class BacklinkIndexingAgent:
12
+ def __init__(self, gsc_credentials_path, site_url):
13
+ self.site_url = site_url
14
+ self.gsc = None
15
+
16
+ if os.path.exists(gsc_credentials_path):
17
+ try:
18
+ credentials = service_account.Credentials.from_service_account_file(
19
+ gsc_credentials_path,
20
+ scopes=['https://www.googleapis.com/auth/webmasters']
21
+ )
22
+ self.gsc = build('searchconsole', 'v1', credentials=credentials)
23
+ except Exception as e:
24
+ logger.error(f"Failed to initialize GSC client: {e}")
25
+ else:
26
+ logger.warning(f"GSC credentials not found at {gsc_credentials_path}")
27
+
28
+ def submit_to_indexnow(self, urls, api_key="your-key", key_location=""):
29
+ try:
30
+ endpoint = "https://api.indexnow.org/IndexNow"
31
+ host = self.site_url.replace('https://', '').replace('http://', '')
32
+ payload = {
33
+ "host": host,
34
+ "key": api_key,
35
+ "keyLocation": key_location or f"{self.site_url}/indexnow-key.txt",
36
+ "urlList": urls
37
+ }
38
+ response = requests.post(endpoint, json=payload, timeout=10)
39
+ return response.status_code == 200
40
+ except Exception as e:
41
+ logger.error(f"IndexNow submission failed: {e}")
42
+ return False
43
+
44
+ def submit_to_google(self, url):
45
+ if not self.gsc: return False
46
+ try:
47
+ body = {'url': url, 'type': 'URL_UPDATED'}
48
+ self.gsc.urlInspection().index().inspect(
49
+ siteUrl=self.site_url,
50
+ body=body
51
+ ).execute()
52
+ return True
53
+ except Exception as e:
54
+ logger.error(f"GSC submission failed for {url}: {e}")
55
+ return False
56
+
57
+ def auto_submit_new_pages(self, new_urls):
58
+ results = {'indexnow': False, 'google': []}
59
+
60
+ # IndexNow
61
+ if new_urls:
62
+ results['indexnow'] = self.submit_to_indexnow(new_urls)
63
+
64
+ # Google (Limit 200/day normally)
65
+ for url in new_urls[:10]:
66
+ full_url = url if url.startswith('http') else f"{self.site_url}{url}"
67
+ success = self.submit_to_google(full_url)
68
+ results['google'].append({'url': url, 'success': success})
69
+
70
+ return results
71
+
72
+ def check_indexing_status(self):
73
+ if not self.gsc: return []
74
+ # In a real impl, query searchAnalytics or urlInspection
75
+ # Placeholder for demonstration
76
+ return []
77
+
78
+ def get_indexing_errors(self):
79
+ return []
agents/competitor_intelligence.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from playwright.sync_api import sync_playwright
2
+ from sentence_transformers import SentenceTransformer, util
3
+ import time
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class CompetitorIntelligenceAgent:
9
+ def __init__(self):
10
+ try:
11
+ self.semantic_model = SentenceTransformer('all-mpnet-base-v2')
12
+ except Exception as e:
13
+ logger.error(f"Failed to load semantic model: {e}")
14
+
15
+ def scrape_serp(self, keyword):
16
+ results = []
17
+ try:
18
+ with sync_playwright() as p:
19
+ browser = p.chromium.launch(headless=True)
20
+ page = browser.new_page()
21
+
22
+ # Search Google (Note: frequent scraping might get blocked)
23
+ page.goto(f"https://www.google.com/search?q={keyword}&num=10")
24
+ time.sleep(2) # Be polite
25
+
26
+ # Extract results (Selectors might need maintenance)
27
+ links = page.locator('div.g a').all()
28
+ for link in links:
29
+ url = link.get_attribute('href')
30
+ if url and 'google' not in url:
31
+ results.append(url)
32
+ if len(results) >= 5: break
33
+
34
+ browser.close()
35
+ except Exception as e:
36
+ logger.error(f"SERP scraping failed: {e}")
37
+
38
+ return results
39
+
40
+ def analyze_content_gaps(self, your_content, competitor_contents):
41
+ if not hasattr(self, 'semantic_model'): return []
42
+
43
+ your_emb = self.semantic_model.encode(your_content, convert_to_tensor=True)
44
+ comp_embs = self.semantic_model.encode(competitor_contents, convert_to_tensor=True)
45
+
46
+ similarities = util.cos_sim(your_emb, comp_embs)
47
+
48
+ gaps = []
49
+ for idx, sim in enumerate(similarities[0]):
50
+ if sim < 0.6:
51
+ gaps.append({
52
+ "competitor_idx": idx,
53
+ "similarity": float(sim),
54
+ "note": "Topic gap detected"
55
+ })
56
+ return gaps
57
+
58
+ def generate_competitive_report(self, keyword):
59
+ logger.info(f"Generating report for keyword: {keyword}")
60
+
61
+ competitor_urls = self.scrape_serp(keyword)
62
+ competitor_contents = []
63
+
64
+ # In a real run, we would visit each URL to get content
65
+ # For this skeleton, we'll placeholder
66
+
67
+ return {
68
+ "keyword": keyword,
69
+ "top_competitors": competitor_urls,
70
+ "analysis_status": "completed_basic",
71
+ "message": "Deep content analysis requires full scraping infrastructure which is rate-limit sensitive."
72
+ }
agents/content_optimizer.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, BartForConditionalGeneration, BartTokenizer, T5ForConditionalGeneration, T5Tokenizer
2
+ from sentence_transformers import SentenceTransformer, util
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class ContentOptimizationAgent:
10
+ def __init__(self):
11
+ try:
12
+ self.keyword_extractor = pipeline(
13
+ "token-classification",
14
+ model="ml6team/keyphrase-extraction-distilbert-inspec",
15
+ aggregation_strategy="simple"
16
+ )
17
+ self.meta_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
18
+ self.meta_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
19
+ self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
20
+
21
+ # Load T5 only if needed to save memory, or here if sufficient RAM (Space has 16GB)
22
+ self.title_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
23
+ self.title_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
24
+ except Exception as e:
25
+ logger.error(f"Failed to load models for Content Optimizer: {e}")
26
+
27
+ def extract_keywords(self, text):
28
+ if not hasattr(self, 'keyword_extractor'): return []
29
+ results = self.keyword_extractor(text[:512]) # Limit for speed
30
+ keywords = [r['word'] for r in results if r['score'] > 0.7]
31
+ return list(set(keywords))
32
+
33
+ def generate_meta_description(self, text, max_length=160):
34
+ if not hasattr(self, 'meta_model'): return ""
35
+ inputs = self.meta_tokenizer(text[:1024], return_tensors="pt", truncation=True)
36
+ summary_ids = self.meta_model.generate(
37
+ inputs["input_ids"],
38
+ max_length=max_length,
39
+ min_length=100,
40
+ length_penalty=2.0,
41
+ num_beams=4,
42
+ early_stopping=True
43
+ )
44
+ return self.meta_tokenizer.decode(summary_ids, skip_special_tokens=True)
45
+
46
+ def optimize_title(self, content, keyword):
47
+ if not hasattr(self, 'title_model'): return ""
48
+ prompt = f"Write an SEO-optimized page title under 60 characters for content about '{keyword}': {content[:500]}"
49
+ inputs = self.title_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
50
+ outputs = self.title_model.generate(inputs["input_ids"], max_length=60, num_beams=5)
51
+ return self.title_tokenizer.decode(outputs, skip_special_tokens=True)
52
+
53
+ def analyze_page(self, url):
54
+ logger.info(f"Optimizing content for: {url}")
55
+ try:
56
+ response = requests.get(url, timeout=10)
57
+ soup = BeautifulSoup(response.text, 'html.parser')
58
+ content = soup.get_text(separator=' ', strip=True)
59
+
60
+ keywords = self.extract_keywords(content)
61
+ suggested_meta = self.generate_meta_description(content)
62
+
63
+ # Detect main keyword (simple frequency for now, or use first extracted)
64
+ main_keyword = keywords[0] if keywords else "general"
65
+ suggested_title = self.optimize_title(content, main_keyword)
66
+
67
+ recommendations = []
68
+ if len(keywords) < 3:
69
+ recommendations.append("Content might be too thin; few keywords detected.")
70
+
71
+ return {
72
+ "url": url,
73
+ "extracted_keywords": keywords,
74
+ "suggested_title": suggested_title,
75
+ "suggested_meta_description": suggested_meta,
76
+ "recommendations": recommendations
77
+ }
78
+ except Exception as e:
79
+ logger.error(f"Content optimization failed: {e}")
80
+ return {"error": str(e)}
agents/orchestrator.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import redis
2
+ import json
3
+ import logging
4
+ from datetime import datetime
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class OrchestratorAgent:
9
+ def __init__(self, redis_host='localhost', redis_port=6379, agents=None):
10
+ self.agents = agents or {}
11
+ try:
12
+ self.redis = redis.Redis(host=redis_host, port=redis_port, decode_responses=True)
13
+ # Test connection
14
+ self.redis.ping()
15
+ except Exception as e:
16
+ logger.warning(f"Redis connection failed (Orchestrator): {e}")
17
+ self.redis = None
18
+
19
+ def create_workflow(self, workflow_name, payload):
20
+ workflow_id = f"workflow_{workflow_name}_{datetime.now().timestamp()}"
21
+ logger.info(f"Creating workflow {workflow_id}")
22
+
23
+ # In a real system, you'd decompose this into steps and enqueue them
24
+ # For this prototype, we just log it and maybe store state
25
+
26
+ if self.redis:
27
+ try:
28
+ self.redis.set(workflow_id, json.dumps({
29
+ "name": workflow_name,
30
+ "payload": payload,
31
+ "status": "created",
32
+ "created_at": datetime.now().isoformat()
33
+ }))
34
+ self.redis.expire(workflow_id, 86400) # 1 day expiry
35
+ except Exception as e:
36
+ logger.error(f"Failed to save workflow state: {e}")
37
+
38
+ return workflow_id
39
+
40
+ def monitor_agent_health(self):
41
+ # Check system health
42
+ return {
43
+ "orchestrator": "healthy",
44
+ "redis": "connected" if self.redis else "disconnected"
45
+ }
agents/performance_analytics.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.analytics.data_v1beta import BetaAnalyticsDataClient
2
+ from google.analytics.data_v1beta.types import RunReportRequest, DateRange, Metric, Dimension
3
+ from google.oauth2 import service_account
4
+ from googleapiclient.discovery import build
5
+ import pandas as pd
6
+ from datetime import datetime, timedelta
7
+ import requests
8
+ import logging
9
+ import os
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class PerformanceAnalyticsAgent:
14
+ def __init__(self, ga4_property_id, ga4_credentials_path, gsc_credentials_path, site_url):
15
+ self.property_id = ga4_property_id
16
+ self.site_url = site_url
17
+ self.ga4_client = None
18
+ self.gsc = None
19
+
20
+ if os.path.exists(ga4_credentials_path):
21
+ try:
22
+ creds = service_account.Credentials.from_service_account_file(ga4_credentials_path)
23
+ self.ga4_client = BetaAnalyticsDataClient(credentials=creds)
24
+ except Exception as e: logger.error(f"GA4 init failed: {e}")
25
+
26
+ if os.path.exists(gsc_credentials_path):
27
+ try:
28
+ creds = service_account.Credentials.from_service_account_file(gsc_credentials_path)
29
+ self.gsc = build('searchconsole', 'v1', credentials=creds)
30
+ except Exception as e: logger.error(f"GSC init failed: {e}")
31
+
32
+ def get_underperforming_pages(self, days=30):
33
+ if not self.gsc: return []
34
+ try:
35
+ response = self.gsc.searchanalytics().query(
36
+ siteUrl=self.site_url,
37
+ body={
38
+ 'startDate': (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d'),
39
+ 'endDate': datetime.now().strftime('%Y-%m-%d'),
40
+ 'dimensions': ['page'],
41
+ 'rowLimit': 100
42
+ }
43
+ ).execute()
44
+
45
+ underperforming = []
46
+ for row in response.get('rows', []):
47
+ ctr = row['ctr']
48
+ if row['impressions'] > 100 and ctr < 0.02:
49
+ underperforming.append({
50
+ 'url': row['keys'][0],
51
+ 'impressions': row['impressions'],
52
+ 'ctr': ctr
53
+ })
54
+ return underperforming
55
+ except Exception as e:
56
+ logger.error(f"GSC query failed: {e}")
57
+ return []
58
+
59
+ def detect_algorithm_update(self):
60
+ # Simplified volatility check
61
+ # In production this would compare day-over-day ranking distributions
62
+ return {
63
+ 'volatility_detected': False,
64
+ 'avg_position_change': 0.0
65
+ }
66
+
67
+ def generate_weekly_report(self):
68
+ report = {
69
+ 'summary': {'total_pageviews': 0},
70
+ 'top_pages': [],
71
+ 'underperforming_pages': []
72
+ }
73
+
74
+ if self.ga4_client:
75
+ try:
76
+ request = RunReportRequest(
77
+ property=f"properties/{self.property_id}",
78
+ date_ranges=[DateRange(start_date="7daysAgo", end_date="today")],
79
+ dimensions=[Dimension(name="pagePath")],
80
+ metrics=[Metric(name="screenPageViews")]
81
+ )
82
+ response = self.ga4_client.run_report(request)
83
+
84
+ total_pv = 0
85
+ for row in response.rows:
86
+ pv = int(row.metric_values[0].value)
87
+ total_pv += pv
88
+ report['top_pages'].append({
89
+ 'path': row.dimension_values[0].value,
90
+ 'views': pv
91
+ })
92
+ report['summary']['total_pageviews'] = total_pv
93
+ except Exception as e:
94
+ logger.error(f"GA4 report failed: {e}")
95
+
96
+ return report
97
+
98
+ def monitor_core_web_vitals(self, url):
99
+ # Uses public PageSpeed Insights API
100
+ try:
101
+ psi_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
102
+ params = {'url': url, 'strategy': 'mobile', 'category': ['performance', 'seo']}
103
+ resp = requests.get(psi_url, params=params)
104
+ data = resp.json()
105
+
106
+ lighthouse = data.get('lighthouseResult', {})
107
+ return {
108
+ 'url': url,
109
+ 'performance_score': lighthouse.get('categories', {}).get('performance', {}).get('score')
110
+ }
111
+ except Exception as e:
112
+ logger.error(f"CWV check failed: {e}")
113
+ return {'error': str(e)}
agents/technical_auditor.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class TechnicalAuditorAgent:
9
+ def __init__(self):
10
+ try:
11
+ self.classifier = pipeline(
12
+ "zero-shot-classification",
13
+ model="facebook/bart-large-mnli",
14
+ device=-1 # CPU inference
15
+ )
16
+ except Exception as e:
17
+ logger.warning(f"Could not load ML models for Technical Auditor: {e}")
18
+ self.classifier = None
19
+
20
+ def assess_content_quality(self, page_text):
21
+ if not self.classifier:
22
+ return ["error-model-not-loaded"]
23
+
24
+ result = self.classifier(
25
+ page_text[:1024], # Limit text length for speed
26
+ candidate_labels=["high-quality", "thin-content", "spam", "keyword-stuffing"]
27
+ )
28
+ return result['labels']
29
+
30
+ def audit_page(self, url):
31
+ logger.info(f"Auditing page: {url}")
32
+ results = {
33
+ "url": url,
34
+ "broken_links": [],
35
+ "missing_alt": [],
36
+ "content_quality": "unknown"
37
+ }
38
+
39
+ try:
40
+ # Note: In a real scenario, you might need to handle headers/user-agents
41
+ response = requests.get(url, timeout=10)
42
+ if response.status_code != 200:
43
+ logger.error(f"Page returned status {response.status_code}")
44
+ return results
45
+
46
+ soup = BeautifulSoup(response.text, 'html.parser')
47
+
48
+ # Check content quality
49
+ text_content = soup.get_text(separator=' ', strip=True)
50
+ results['content_quality'] = self.assess_content_quality(text_content)
51
+
52
+ # Check images
53
+ images = soup.find_all('img')
54
+ for img in images:
55
+ if not img.get('alt'):
56
+ results['missing_alt'].append(img.get('src'))
57
+
58
+ # Check links (simplified)
59
+ links = soup.find_all('a')
60
+ for link in links:
61
+ href = link.get('href')
62
+ if href and href.startswith('http'):
63
+ try:
64
+ head = requests.head(href, timeout=5)
65
+ if head.status_code >= 400:
66
+ results['broken_links'].append(href)
67
+ except:
68
+ results['broken_links'].append(href)
69
+
70
+ except Exception as e:
71
+ logger.error(f"Audit failed for {url}: {e}")
72
+
73
+ logger.info(f"Audit complete for {url}: {results}")
74
+ # In a real system, you would post these results back to a webhook or database
75
+ return results
main.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, BackgroundTasks, HTTPException, Request
2
+ from pydantic import BaseModel
3
+ from typing import List, Optional, Dict, Any
4
+ import logging
5
+ import os
6
+ import json
7
+
8
+ # Import agents (we will create these files next)
9
+ from agents.technical_auditor import TechnicalAuditorAgent
10
+ from agents.content_optimizer import ContentOptimizationAgent
11
+ from agents.competitor_intelligence import CompetitorIntelligenceAgent
12
+ from agents.backlink_indexing import BacklinkIndexingAgent
13
+ from agents.performance_analytics import PerformanceAnalyticsAgent
14
+ from agents.orchestrator import OrchestratorAgent
15
+
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ app = FastAPI(title="SEO Multi-Agent System", version="1.0.0")
20
+
21
+ # Initialize agents
22
+ # Note: In a real deployment, credentials would be loaded from env vars or mounted secrets
23
+ technical_agent = TechnicalAuditorAgent()
24
+ content_agent = ContentOptimizationAgent()
25
+ competitor_agent = CompetitorIntelligenceAgent()
26
+ indexing_agent = BacklinkIndexingAgent(
27
+ gsc_credentials_path="/app/credentials/gsc-credentials.json",
28
+ site_url="https://yoursite.pages.dev" # Replace with actual site
29
+ )
30
+ performance_agent = PerformanceAnalyticsAgent(
31
+ ga4_property_id="your-property-id", # Replace
32
+ ga4_credentials_path="/app/credentials/ga4-credentials.json",
33
+ gsc_credentials_path="/app/credentials/gsc-credentials.json",
34
+ site_url="https://yoursite.pages.dev" # Replace
35
+ )
36
+ orchestrator = OrchestratorAgent(redis_host="redis", redis_port=6379, agents={}) # Redis host depends on service name in docker-compose or similar
37
+
38
+ # --- MODELS ---
39
+
40
+ class PageAuditRequest(BaseModel):
41
+ url: str
42
+ checks: Optional[List[str]] = ["all"]
43
+
44
+ class ContentOptimizeRequest(BaseModel):
45
+ url: str
46
+ tasks: Optional[List[str]] = ["all"]
47
+
48
+ class CompetitorAnalysisRequest(BaseModel):
49
+ keyword: str
50
+
51
+ class IndexingRequest(BaseModel):
52
+ urls: List[str]
53
+
54
+ class WorkflowTrigger(BaseModel):
55
+ workflow_name: str
56
+ payload: Dict[str, Any]
57
+
58
+ # --- ENDPOINTS ---
59
+
60
+ @app.get("/")
61
+ async def root():
62
+ return {
63
+ "service": "SEO Multi-Agent System",
64
+ "status": "running",
65
+ "agents": ["technical", "content", "competitor", "indexing", "performance"]
66
+ }
67
+
68
+ @app.get("/health")
69
+ async def health_check():
70
+ # Basic check, can expand to check agents
71
+ return {"status": "healthy", "orchestrator": "online"}
72
+
73
+ # TECHNICAL AUDITOR
74
+ @app.post("/audit/execute")
75
+ async def execute_audit(request: PageAuditRequest, background_tasks: BackgroundTasks):
76
+ background_tasks.add_task(technical_agent.audit_page, request.url)
77
+ return {"status": "started", "url": request.url}
78
+
79
+ # CONTENT OPTIMIZER
80
+ @app.post("/optimize")
81
+ async def optimize_content(request: ContentOptimizeRequest):
82
+ result = content_agent.analyze_page(request.url)
83
+ return result
84
+
85
+ # COMPETITOR INTELLIGENCE
86
+ @app.post("/competitor/analyze")
87
+ async def analyze_competitor(request: CompetitorAnalysisRequest, background_tasks: BackgroundTasks):
88
+ background_tasks.add_task(competitor_agent.generate_competitive_report, request.keyword)
89
+ return {"status": "started", "keyword": request.keyword}
90
+
91
+ # INDEXING
92
+ @app.post("/index/submit")
93
+ async def submit_indexing(request: IndexingRequest):
94
+ results = indexing_agent.auto_submit_new_pages(request.urls)
95
+ return results
96
+
97
+ @app.get("/index/status")
98
+ async def indexing_status():
99
+ status = indexing_agent.check_indexing_status()
100
+ errors = indexing_agent.get_indexing_errors()
101
+ return {"indexed_pages": status, "errors": errors}
102
+
103
+ # PERFORMANCE & ANALYTICS
104
+ @app.get("/analytics/underperforming")
105
+ async def get_underperforming():
106
+ try:
107
+ pages = performance_agent.get_underperforming_pages(days=30)
108
+ return pages
109
+ except Exception as e:
110
+ logger.error(f"Error getting underperforming pages: {e}")
111
+ return []
112
+
113
+ @app.get("/report/weekly")
114
+ async def weekly_report():
115
+ try:
116
+ report = performance_agent.generate_weekly_report()
117
+ return report
118
+ except Exception as e:
119
+ logger.error(f"Error generating weekly report: {e}")
120
+ return {"error": str(e)}
121
+
122
+ @app.get("/monitor/algorithm")
123
+ async def check_algorithm():
124
+ try:
125
+ status = performance_agent.detect_algorithm_update()
126
+ return status
127
+ except Exception as e:
128
+ return {"volatility_detected": False, "error": str(e)}
129
+
130
+ @app.get("/monitor/cwv")
131
+ async def check_cwv(url: str):
132
+ result = performance_agent.monitor_core_web_vitals(url)
133
+ return result
134
+
135
+ # ORCHESTRATOR
136
+ @app.post("/workflow/trigger")
137
+ async def trigger_workflow(request: WorkflowTrigger):
138
+ workflow_id = orchestrator.create_workflow(
139
+ request.workflow_name,
140
+ request.payload
141
+ )
142
+ return {"workflow_id": workflow_id, "status": "started"}
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.0
2
+ uvicorn[standard]==0.27.0
3
+ transformers==4.36.0
4
+ torch==2.1.0
5
+ sentence-transformers==2.3.1
6
+ beautifulsoup4==4.12.3
7
+ scrapy==2.11.0
8
+ playwright==1.41.0
9
+ pandas==2.2.0
10
+ numpy==1.26.3
11
+ scikit-learn==1.4.0
12
+ redis==5.0.1
13
+ psycopg2-binary==2.9.9
14
+ google-api-python-client==2.116.0
15
+ google-auth==2.27.0
16
+ google-analytics-data==0.18.5
17
+ apscheduler==3.10.4
18
+ requests==2.31.0
19
+ pillow==10.2.0
20
+ lxml==5.1.0
21
+ keybert==0.8.4