|
|
from playwright.sync_api import sync_playwright |
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
import time |
|
|
import logging |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class CompetitorIntelligenceAgent: |
|
|
def __init__(self): |
|
|
try: |
|
|
self.semantic_model = SentenceTransformer('all-mpnet-base-v2') |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load semantic model: {e}") |
|
|
|
|
|
def scrape_serp(self, keyword): |
|
|
results = [] |
|
|
try: |
|
|
with sync_playwright() as p: |
|
|
browser = p.chromium.launch(headless=True) |
|
|
page = browser.new_page() |
|
|
|
|
|
|
|
|
page.goto(f"https://www.google.com/search?q={keyword}&num=10") |
|
|
time.sleep(2) |
|
|
|
|
|
|
|
|
links = page.locator('div.g a').all() |
|
|
for link in links: |
|
|
url = link.get_attribute('href') |
|
|
if url and 'google' not in url: |
|
|
results.append(url) |
|
|
if len(results) >= 5: break |
|
|
|
|
|
browser.close() |
|
|
except Exception as e: |
|
|
logger.error(f"SERP scraping failed: {e}") |
|
|
|
|
|
return results |
|
|
|
|
|
def analyze_content_gaps(self, your_content, competitor_contents): |
|
|
if not hasattr(self, 'semantic_model'): return [] |
|
|
|
|
|
your_emb = self.semantic_model.encode(your_content, convert_to_tensor=True) |
|
|
comp_embs = self.semantic_model.encode(competitor_contents, convert_to_tensor=True) |
|
|
|
|
|
similarities = util.cos_sim(your_emb, comp_embs) |
|
|
|
|
|
gaps = [] |
|
|
for idx, sim in enumerate(similarities[0]): |
|
|
if sim < 0.6: |
|
|
gaps.append({ |
|
|
"competitor_idx": idx, |
|
|
"similarity": float(sim), |
|
|
"note": "Topic gap detected" |
|
|
}) |
|
|
return gaps |
|
|
|
|
|
def generate_competitive_report(self, keyword): |
|
|
logger.info(f"Generating report for keyword: {keyword}") |
|
|
|
|
|
competitor_urls = self.scrape_serp(keyword) |
|
|
competitor_contents = [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return { |
|
|
"keyword": keyword, |
|
|
"top_competitors": competitor_urls, |
|
|
"analysis_status": "completed_basic", |
|
|
"message": "Deep content analysis requires full scraping infrastructure which is rate-limit sensitive." |
|
|
} |
|
|
|