from transformers import pipeline, BartForConditionalGeneration, BartTokenizer, T5ForConditionalGeneration, T5Tokenizer from sentence_transformers import SentenceTransformer, util import requests from bs4 import BeautifulSoup import logging logger = logging.getLogger(__name__) class ContentOptimizationAgent: def __init__(self): try: self.keyword_extractor = pipeline( "token-classification", model="ml6team/keyphrase-extraction-distilbert-inspec", aggregation_strategy="simple" ) self.meta_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") self.meta_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2') # Load T5 only if needed to save memory, or here if sufficient RAM (Space has 16GB) self.title_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large") self.title_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large") except Exception as e: logger.error(f"Failed to load models for Content Optimizer: {e}") def extract_keywords(self, text): if not hasattr(self, 'keyword_extractor'): return [] results = self.keyword_extractor(text[:512]) # Limit for speed keywords = [r['word'] for r in results if r['score'] > 0.7] return list(set(keywords)) def generate_meta_description(self, text, max_length=160): if not hasattr(self, 'meta_model'): return "" inputs = self.meta_tokenizer(text[:1024], return_tensors="pt", truncation=True) summary_ids = self.meta_model.generate( inputs["input_ids"], max_length=max_length, min_length=100, length_penalty=2.0, num_beams=4, early_stopping=True ) try: return self.meta_tokenizer.decode(summary_ids[0], skip_special_tokens=True) except Exception as e: logger.error(f"Meta gen failed: {e}") return "Could not generate meta description." def optimize_title(self, content, keyword): if not hasattr(self, 'title_model'): return "" try: prompt = f"Write an SEO-optimized page title under 60 characters for content about '{keyword}': {content[:500]}" inputs = self.title_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) outputs = self.title_model.generate(inputs["input_ids"], max_length=60, num_beams=5) return self.title_tokenizer.decode(outputs[0], skip_special_tokens=True) except Exception as e: logger.error(f"Title gen failed: {e}") return "Could not generate title." def analyze_page(self, url): logger.info(f"Optimizing content for: {url}") try: response = requests.get(url, timeout=10) soup = BeautifulSoup(response.text, 'html.parser') content = soup.get_text(separator=' ', strip=True) keywords = self.extract_keywords(content) suggested_meta = self.generate_meta_description(content) # Detect main keyword (simple frequency for now, or use first extracted) main_keyword = keywords[0] if keywords else "general" suggested_title = self.optimize_title(content, main_keyword) recommendations = [] if len(keywords) < 3: recommendations.append("Content might be too thin; few keywords detected.") return { "url": url, "extracted_keywords": keywords, "suggested_title": suggested_title, "suggested_meta_description": suggested_meta, "recommendations": recommendations } except Exception as e: logger.error(f"Content optimization failed: {e}") return {"error": str(e)}