SEO / agents /competitor_intelligence.py
pkm13's picture
Upload 10 files
e5ab217 verified
from playwright.sync_api import sync_playwright
from sentence_transformers import SentenceTransformer, util
import time
import logging
logger = logging.getLogger(__name__)
class CompetitorIntelligenceAgent:
def __init__(self):
try:
self.semantic_model = SentenceTransformer('all-mpnet-base-v2')
except Exception as e:
logger.error(f"Failed to load semantic model: {e}")
def scrape_serp(self, keyword):
results = []
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
# Search Google (Note: frequent scraping might get blocked)
page.goto(f"https://www.google.com/search?q={keyword}&num=10")
time.sleep(2) # Be polite
# Extract results (Selectors might need maintenance)
links = page.locator('div.g a').all()
for link in links:
url = link.get_attribute('href')
if url and 'google' not in url:
results.append(url)
if len(results) >= 5: break
browser.close()
except Exception as e:
logger.error(f"SERP scraping failed: {e}")
return results
def analyze_content_gaps(self, your_content, competitor_contents):
if not hasattr(self, 'semantic_model'): return []
your_emb = self.semantic_model.encode(your_content, convert_to_tensor=True)
comp_embs = self.semantic_model.encode(competitor_contents, convert_to_tensor=True)
similarities = util.cos_sim(your_emb, comp_embs)
gaps = []
for idx, sim in enumerate(similarities[0]):
if sim < 0.6:
gaps.append({
"competitor_idx": idx,
"similarity": float(sim),
"note": "Topic gap detected"
})
return gaps
def generate_competitive_report(self, keyword):
logger.info(f"Generating report for keyword: {keyword}")
competitor_urls = self.scrape_serp(keyword)
competitor_contents = []
# In a real run, we would visit each URL to get content
# For this skeleton, we'll placeholder
return {
"keyword": keyword,
"top_competitors": competitor_urls,
"analysis_status": "completed_basic",
"message": "Deep content analysis requires full scraping infrastructure which is rate-limit sensitive."
}