File size: 4,090 Bytes
e5ab217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454512f
 
 
 
 
e5ab217
 
 
454512f
 
 
 
 
 
 
 
e5ab217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from transformers import pipeline, BartForConditionalGeneration, BartTokenizer, T5ForConditionalGeneration, T5Tokenizer
from sentence_transformers import SentenceTransformer, util
import requests
from bs4 import BeautifulSoup
import logging

logger = logging.getLogger(__name__)

class ContentOptimizationAgent:
    def __init__(self):
        try:
            self.keyword_extractor = pipeline(
                "token-classification",
                model="ml6team/keyphrase-extraction-distilbert-inspec",
                aggregation_strategy="simple"
            )
            self.meta_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
            self.meta_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
            self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
            
            # Load T5 only if needed to save memory, or here if sufficient RAM (Space has 16GB)
            self.title_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
            self.title_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
        except Exception as e:
            logger.error(f"Failed to load models for Content Optimizer: {e}")

    def extract_keywords(self, text):
        if not hasattr(self, 'keyword_extractor'): return []
        results = self.keyword_extractor(text[:512]) # Limit for speed
        keywords = [r['word'] for r in results if r['score'] > 0.7]
        return list(set(keywords))

    def generate_meta_description(self, text, max_length=160):
        if not hasattr(self, 'meta_model'): return ""
        inputs = self.meta_tokenizer(text[:1024], return_tensors="pt", truncation=True)
        summary_ids = self.meta_model.generate(
            inputs["input_ids"],
            max_length=max_length,
            min_length=100,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
        try:
            return self.meta_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        except Exception as e:
            logger.error(f"Meta gen failed: {e}")
            return "Could not generate meta description."

    def optimize_title(self, content, keyword):
        if not hasattr(self, 'title_model'): return ""
        try:
            prompt = f"Write an SEO-optimized page title under 60 characters for content about '{keyword}': {content[:500]}"
            inputs = self.title_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
            outputs = self.title_model.generate(inputs["input_ids"], max_length=60, num_beams=5)
            return self.title_tokenizer.decode(outputs[0], skip_special_tokens=True)
        except Exception as e:
            logger.error(f"Title gen failed: {e}")
            return "Could not generate title."

    def analyze_page(self, url):
        logger.info(f"Optimizing content for: {url}")
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.text, 'html.parser')
            content = soup.get_text(separator=' ', strip=True)
            
            keywords = self.extract_keywords(content)
            suggested_meta = self.generate_meta_description(content)
            
            # Detect main keyword (simple frequency for now, or use first extracted)
            main_keyword = keywords[0] if keywords else "general"
            suggested_title = self.optimize_title(content, main_keyword)

            recommendations = []
            if len(keywords) < 3:
                recommendations.append("Content might be too thin; few keywords detected.")
            
            return {
                "url": url,
                "extracted_keywords": keywords,
                "suggested_title": suggested_title,
                "suggested_meta_description": suggested_meta,
                "recommendations": recommendations
            }
        except Exception as e:
            logger.error(f"Content optimization failed: {e}")
            return {"error": str(e)}