SEO / agents /content_optimizer.py
pkm13's picture
Upload content_optimizer.py
454512f verified
from transformers import pipeline, BartForConditionalGeneration, BartTokenizer, T5ForConditionalGeneration, T5Tokenizer
from sentence_transformers import SentenceTransformer, util
import requests
from bs4 import BeautifulSoup
import logging
logger = logging.getLogger(__name__)
class ContentOptimizationAgent:
def __init__(self):
try:
self.keyword_extractor = pipeline(
"token-classification",
model="ml6team/keyphrase-extraction-distilbert-inspec",
aggregation_strategy="simple"
)
self.meta_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
self.meta_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
# Load T5 only if needed to save memory, or here if sufficient RAM (Space has 16GB)
self.title_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
self.title_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
except Exception as e:
logger.error(f"Failed to load models for Content Optimizer: {e}")
def extract_keywords(self, text):
if not hasattr(self, 'keyword_extractor'): return []
results = self.keyword_extractor(text[:512]) # Limit for speed
keywords = [r['word'] for r in results if r['score'] > 0.7]
return list(set(keywords))
def generate_meta_description(self, text, max_length=160):
if not hasattr(self, 'meta_model'): return ""
inputs = self.meta_tokenizer(text[:1024], return_tensors="pt", truncation=True)
summary_ids = self.meta_model.generate(
inputs["input_ids"],
max_length=max_length,
min_length=100,
length_penalty=2.0,
num_beams=4,
early_stopping=True
)
try:
return self.meta_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
except Exception as e:
logger.error(f"Meta gen failed: {e}")
return "Could not generate meta description."
def optimize_title(self, content, keyword):
if not hasattr(self, 'title_model'): return ""
try:
prompt = f"Write an SEO-optimized page title under 60 characters for content about '{keyword}': {content[:500]}"
inputs = self.title_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
outputs = self.title_model.generate(inputs["input_ids"], max_length=60, num_beams=5)
return self.title_tokenizer.decode(outputs[0], skip_special_tokens=True)
except Exception as e:
logger.error(f"Title gen failed: {e}")
return "Could not generate title."
def analyze_page(self, url):
logger.info(f"Optimizing content for: {url}")
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
content = soup.get_text(separator=' ', strip=True)
keywords = self.extract_keywords(content)
suggested_meta = self.generate_meta_description(content)
# Detect main keyword (simple frequency for now, or use first extracted)
main_keyword = keywords[0] if keywords else "general"
suggested_title = self.optimize_title(content, main_keyword)
recommendations = []
if len(keywords) < 3:
recommendations.append("Content might be too thin; few keywords detected.")
return {
"url": url,
"extracted_keywords": keywords,
"suggested_title": suggested_title,
"suggested_meta_description": suggested_meta,
"recommendations": recommendations
}
except Exception as e:
logger.error(f"Content optimization failed: {e}")
return {"error": str(e)}