|
|
from transformers import pipeline, BartForConditionalGeneration, BartTokenizer, T5ForConditionalGeneration, T5Tokenizer |
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import logging |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class ContentOptimizationAgent: |
|
|
def __init__(self): |
|
|
try: |
|
|
self.keyword_extractor = pipeline( |
|
|
"token-classification", |
|
|
model="ml6team/keyphrase-extraction-distilbert-inspec", |
|
|
aggregation_strategy="simple" |
|
|
) |
|
|
self.meta_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") |
|
|
self.meta_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") |
|
|
self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
|
|
|
self.title_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large") |
|
|
self.title_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large") |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load models for Content Optimizer: {e}") |
|
|
|
|
|
def extract_keywords(self, text): |
|
|
if not hasattr(self, 'keyword_extractor'): return [] |
|
|
results = self.keyword_extractor(text[:512]) |
|
|
keywords = [r['word'] for r in results if r['score'] > 0.7] |
|
|
return list(set(keywords)) |
|
|
|
|
|
def generate_meta_description(self, text, max_length=160): |
|
|
if not hasattr(self, 'meta_model'): return "" |
|
|
inputs = self.meta_tokenizer(text[:1024], return_tensors="pt", truncation=True) |
|
|
summary_ids = self.meta_model.generate( |
|
|
inputs["input_ids"], |
|
|
max_length=max_length, |
|
|
min_length=100, |
|
|
length_penalty=2.0, |
|
|
num_beams=4, |
|
|
early_stopping=True |
|
|
) |
|
|
try: |
|
|
return self.meta_tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
|
except Exception as e: |
|
|
logger.error(f"Meta gen failed: {e}") |
|
|
return "Could not generate meta description." |
|
|
|
|
|
def optimize_title(self, content, keyword): |
|
|
if not hasattr(self, 'title_model'): return "" |
|
|
try: |
|
|
prompt = f"Write an SEO-optimized page title under 60 characters for content about '{keyword}': {content[:500]}" |
|
|
inputs = self.title_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) |
|
|
outputs = self.title_model.generate(inputs["input_ids"], max_length=60, num_beams=5) |
|
|
return self.title_tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
except Exception as e: |
|
|
logger.error(f"Title gen failed: {e}") |
|
|
return "Could not generate title." |
|
|
|
|
|
def analyze_page(self, url): |
|
|
logger.info(f"Optimizing content for: {url}") |
|
|
try: |
|
|
response = requests.get(url, timeout=10) |
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
content = soup.get_text(separator=' ', strip=True) |
|
|
|
|
|
keywords = self.extract_keywords(content) |
|
|
suggested_meta = self.generate_meta_description(content) |
|
|
|
|
|
|
|
|
main_keyword = keywords[0] if keywords else "general" |
|
|
suggested_title = self.optimize_title(content, main_keyword) |
|
|
|
|
|
recommendations = [] |
|
|
if len(keywords) < 3: |
|
|
recommendations.append("Content might be too thin; few keywords detected.") |
|
|
|
|
|
return { |
|
|
"url": url, |
|
|
"extracted_keywords": keywords, |
|
|
"suggested_title": suggested_title, |
|
|
"suggested_meta_description": suggested_meta, |
|
|
"recommendations": recommendations |
|
|
} |
|
|
except Exception as e: |
|
|
logger.error(f"Content optimization failed: {e}") |
|
|
return {"error": str(e)} |
|
|
|