from playwright.sync_api import sync_playwright from sentence_transformers import SentenceTransformer, util import time import logging logger = logging.getLogger(__name__) class CompetitorIntelligenceAgent: def __init__(self): try: self.semantic_model = SentenceTransformer('all-mpnet-base-v2') except Exception as e: logger.error(f"Failed to load semantic model: {e}") def scrape_serp(self, keyword): results = [] try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() # Search Google (Note: frequent scraping might get blocked) page.goto(f"https://www.google.com/search?q={keyword}&num=10") time.sleep(2) # Be polite # Extract results (Selectors might need maintenance) links = page.locator('div.g a').all() for link in links: url = link.get_attribute('href') if url and 'google' not in url: results.append(url) if len(results) >= 5: break browser.close() except Exception as e: logger.error(f"SERP scraping failed: {e}") return results def analyze_content_gaps(self, your_content, competitor_contents): if not hasattr(self, 'semantic_model'): return [] your_emb = self.semantic_model.encode(your_content, convert_to_tensor=True) comp_embs = self.semantic_model.encode(competitor_contents, convert_to_tensor=True) similarities = util.cos_sim(your_emb, comp_embs) gaps = [] for idx, sim in enumerate(similarities[0]): if sim < 0.6: gaps.append({ "competitor_idx": idx, "similarity": float(sim), "note": "Topic gap detected" }) return gaps def generate_competitive_report(self, keyword): logger.info(f"Generating report for keyword: {keyword}") competitor_urls = self.scrape_serp(keyword) competitor_contents = [] # In a real run, we would visit each URL to get content # For this skeleton, we'll placeholder return { "keyword": keyword, "top_competitors": competitor_urls, "analysis_status": "completed_basic", "message": "Deep content analysis requires full scraping infrastructure which is rate-limit sensitive." }