File size: 2,690 Bytes
e5ab217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from playwright.sync_api import sync_playwright
from sentence_transformers import SentenceTransformer, util
import time
import logging

logger = logging.getLogger(__name__)

class CompetitorIntelligenceAgent:
    def __init__(self):
        try:
            self.semantic_model = SentenceTransformer('all-mpnet-base-v2')
        except Exception as e:
            logger.error(f"Failed to load semantic model: {e}")

    def scrape_serp(self, keyword):
        results = []
        try:
            with sync_playwright() as p:
                browser = p.chromium.launch(headless=True)
                page = browser.new_page()
                
                # Search Google (Note: frequent scraping might get blocked)
                page.goto(f"https://www.google.com/search?q={keyword}&num=10")
                time.sleep(2) # Be polite
                
                # Extract results (Selectors might need maintenance)
                links = page.locator('div.g a').all()
                for link in links:
                    url = link.get_attribute('href')
                    if url and 'google' not in url:
                        results.append(url)
                        if len(results) >= 5: break
                
                browser.close()
        except Exception as e:
            logger.error(f"SERP scraping failed: {e}")
        
        return results

    def analyze_content_gaps(self, your_content, competitor_contents):
        if not hasattr(self, 'semantic_model'): return []
        
        your_emb = self.semantic_model.encode(your_content, convert_to_tensor=True)
        comp_embs = self.semantic_model.encode(competitor_contents, convert_to_tensor=True)
        
        similarities = util.cos_sim(your_emb, comp_embs)
        
        gaps = []
        for idx, sim in enumerate(similarities[0]):
            if sim < 0.6:
                gaps.append({
                   "competitor_idx": idx,
                   "similarity": float(sim),
                   "note": "Topic gap detected"
                })
        return gaps

    def generate_competitive_report(self, keyword):
        logger.info(f"Generating report for keyword: {keyword}")
        
        competitor_urls = self.scrape_serp(keyword)
        competitor_contents = []
        
        # In a real run, we would visit each URL to get content
        # For this skeleton, we'll placeholder
        
        return {
            "keyword": keyword,
            "top_competitors": competitor_urls,
            "analysis_status": "completed_basic",
            "message": "Deep content analysis requires full scraping infrastructure which is rate-limit sensitive."
        }