Spaces:

garyuzair
/

SEO-Content-Generator

Runtime error

App Files Files Community

SEO-Content-Generator / app.py

garyuzair

Update app.py

c0388eb verified 10 months ago

raw

history blame contribute delete

31.8 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup, Comment
	from googlesearch import search
	from fake_useragent import UserAgent
	from transformers import pipeline, AutoTokenizer
	import torch
	import time
	import logging
	import re
	from retrying import retry
	import gc

	# --- Configuration ---
	# Model Options (Ensure keys clearly indicate resource needs)
	MODEL_OPTIONS = {
	# Lighter Models (More likely to work on free tiers)
	"Mistral-7B-Instruct (Fast, Med RAM)": "mistralai/Mistral-7B-Instruct-v0.2",
	"Gemma-7B-IT (Google, Med RAM)": "google/gemma-7b-it",
	"Phi-3-Mini-4k-Instruct (Microsoft, Small, Good)": "microsoft/Phi-3-mini-4k-instruct", # Requires trust_remote_code

	# Medium Models (May require upgraded tiers / more RAM/GPU)
	"Llama-3-8B-Instruct (Meta, High Quality, High RAM/GPU)": "meta-llama/Meta-Llama-3-8B-Instruct",
	"Phi-3-Medium-4k-Instruct (Microsoft, Strong, High RAM/GPU)": "microsoft/Phi-3-medium-4k-instruct", # Requires trust_remote_code
	"Qwen1.5-14B-Chat (Alibaba, Strong, High RAM/GPU)": "Qwen/Qwen1.5-14B-Chat",

	# Larger Models (Very likely require significant paid resources)
	"DeepSeek-Coder-V2-Instruct (DeepSeek, High RAM/GPU)": "deepseek-ai/DeepSeek-Coder-V2-Instruct", # Requires trust_remote_code
	}
	DEFAULT_MODEL_KEY = "Mistral-7B-Instruct (Fast, Med RAM)" # Start with a lighter default selection

	# Scraping & Generation Defaults
	DEFAULT_NUM_RESULTS = 4 # Reduced default slightly
	REQUEST_TIMEOUT = 15
	MAX_COMPETITOR_TEXT_LENGTH = 5500
	DEFAULT_MAX_GENERATION_TOKENS = 2800

	# Retry settings
	RETRY_WAIT_FIXED = 2000
	RETRY_STOP_MAX_ATTEMPT = 3

	# Tone & Audience Options
	TONE_OPTIONS = ["Conversational", "Professional", "Authoritative", "Technical", "Friendly", "Engaging", "Educational", "Persuasive"]
	AUDIENCE_OPTIONS = ["Beginners", "General Audience", "Experts", "Professionals (Specific Field)", "Customers", "Students", "Decision Makers"]

	# --- Logging Setup ---
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(funcName)s] - %(message)s')
	logger = logging.getLogger(__name__)

	# --- State Management ---
	# Initialize session state keys carefully
	if 'current_model_pipeline' not in st.session_state: st.session_state.current_model_pipeline = None
	if 'current_model_id' not in st.session_state: st.session_state.current_model_id = ""
	# Data related state
	if 'scraped_urls' not in st.session_state: st.session_state.scraped_urls = []
	if 'competitor_analysis_text' not in st.session_state: st.session_state.competitor_analysis_text = ""
	if 'generated_content' not in st.session_state: st.session_state.generated_content = ""
	if 'internal_link_suggestions' not in st.session_state: st.session_state.internal_link_suggestions = ""
	if 'last_keyword' not in st.session_state: st.session_state.last_keyword = ""
	if 'last_website_url' not in st.session_state: st.session_state.last_website_url = ""
	if '_internal_last_scrape_keyword' not in st.session_state: st.session_state._internal_last_scrape_keyword = ""


	# --- Helper Functions ---
	def clear_gpu_memory():
	"""Attempts to clear GPU memory cache and run garbage collection."""
	logger.info("Attempting to clear GPU memory...")
	if torch.cuda.is_available():
	try:
	st.session_state.current_model_pipeline = None # Ensure reference is removed FIRST
	gc.collect() # Run Python garbage collection
	torch.cuda.empty_cache() # Tell PyTorch to release cached memory
	gc.collect() # Run GC again
	logger.info("GPU memory cache cleared and garbage collected.")
	st.toast("Cleared GPU memory.", icon="🧹")
	except Exception as e:
	logger.error(f"Error clearing GPU memory: {e}", exc_info=True)
	st.toast(f"Error clearing GPU memory: {e}", icon="❌")
	else:
	logger.info("No GPU available, skipping memory clearing.")
	st.session_state.current_model_pipeline = None # Still clear the reference
	gc.collect()

	def reset_app_data():
	"""Clears stored scraping and generation results, keeps model loaded."""
	st.session_state.scraped_urls = []
	st.session_state.competitor_analysis_text = ""
	st.session_state.generated_content = ""
	st.session_state.internal_link_suggestions = ""
	st.session_state.last_keyword = ""
	st.session_state._internal_last_scrape_keyword = ""
	logger.info("App data state reset (scraped/generated content).")
	st.toast("Cleared scraped data and generated content.", icon="🗑️")

	# --- Model Loading (On Demand) ---
	def load_model(model_id_to_load):
	"""Loads the selected model, unloading any previous one."""
	# If the requested model is already loaded, do nothing
	if st.session_state.get('current_model_id') == model_id_to_load and st.session_state.get('current_model_pipeline') is not None:
	logger.info(f"Model {model_id_to_load} is already loaded.")
	st.toast(f"{model_id_to_load} is already loaded.", icon="✅")
	return True

	# Unload previous model if one exists and is different
	if st.session_state.get('current_model_pipeline') is not None:
	logger.info(f"Unloading previous model: {st.session_state.current_model_id}")
	st.toast(f"Unloading {st.session_state.current_model_id}...", icon="🧹")
	clear_gpu_memory() # This sets pipeline to None and clears cache
	st.session_state.current_model_id = "" # Clear model ID state

	# Load the new model
	st.toast(f"Loading {model_id_to_load}... This may take time & RAM/GPU.", icon="⏳")
	logger.info(f"Attempting to load LLM pipeline for model: {model_id_to_load}")
	pipeline_instance = None
	success = False
	try:
	dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16 if torch.cuda.is_available() else torch.float32
	logger.info(f"Using dtype: {dtype}")

	trust_code_models = [
	"microsoft/Phi-3-mini-4k-instruct",
	"microsoft/Phi-3-medium-4k-instruct",
	"deepseek-ai/DeepSeek-Coder-V2-Instruct",
	# Add others if needed
	]
	trust_code = model_id_to_load in trust_code_models
	logger.info(f"Trust remote code for {model_id_to_load}: {trust_code}")

	# Display spinner during the actual loading
	with st.spinner(f"Loading {model_id_to_load} into memory..."):
	pipeline_instance = pipeline(
	"text-generation",
	model=model_id_to_load,
	trust_remote_code=trust_code,
	device_map="auto",
	torch_dtype=dtype,
	)

	# Handle pad_token
	if pipeline_instance.tokenizer.pad_token_id is None:
	pipeline_instance.tokenizer.pad_token_id = pipeline_instance.tokenizer.eos_token_id
	if hasattr(pipeline_instance.model, 'config'):
	pipeline_instance.model.config.pad_token_id = pipeline_instance.tokenizer.eos_token_id
	logger.warning(f"Set pad_token_id to eos_token_id for {model_id_to_load}")

	logger.info(f"LLM pipeline loaded successfully for {model_id_to_load}.")
	st.session_state.current_model_pipeline = pipeline_instance
	st.session_state.current_model_id = model_id_to_load
	st.toast(f"Model {model_id_to_load} loaded!", icon="✅")
	success = True

	except ImportError as e:
	logger.error(f"ImportError loading {model_id_to_load}: {e}. Missing dependency?", exc_info=True)
	st.error(f"Load Error: Missing library for {model_id_to_load}? Check logs. Details: {e}")
	except Exception as e:
	logger.error(f"Failed to load {model_id_to_load}: {e}", exc_info=True)
	st.error(f"Failed to load {model_id_to_load}. Error: {e}. Check resource limits (RAM/GPU) & logs.")
	clear_gpu_memory() # Attempt to clean up if loading failed
	st.session_state.current_model_id = "" # Ensure state reflects failure
	finally:
	return success # Return status

	# --- User Agent Caching ---
	@st.cache_resource
	def get_user_agent():
	# (Same as previous version)
	logger.info("Initializing FakeUserAgent.")
	try:
	return UserAgent(fallback='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
	except Exception as e:
	logger.error(f"Failed to initialize FakeUserAgent: {e}", exc_info=True)
	st.error(f"Could not initialize User Agent generator. Error: {e}")
	return None

	# --- Core Functions (Scraping, Prompt Building, Generation Logic) ---
	# These functions (get_top_urls, scrape_page_content, clean_text, fetch_url_content,
	# build_content_generation_prompt, build_internal_link_prompt, run_llm_generation)
	# remain largely the same as the previous version, as they were already quite robust.
	# Ensure `run_llm_generation` correctly uses the pipeline passed to it (which it did).

	# --- (Include the definitions for the core functions here - unchanged from previous version) ---
	@retry(wait_fixed=RETRY_WAIT_FIXED, stop_max_attempt_number=RETRY_STOP_MAX_ATTEMPT,
	retry_on_exception=lambda e: isinstance(e, (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.exceptions.HTTPError)))
	def fetch_url_content(url, headers):
	logger.info(f"Fetching {url} (Attempt {fetch_url_content.retry.attempt_number+1}/{RETRY_STOP_MAX_ATTEMPT})")
	response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
	response.raise_for_status()
	if 'text/html' not in response.headers.get('Content-Type', ''):
	logger.warning(f"Skipping URL {url} - Not HTML")
	return None
	if len(response.content) > 10 * 1024 * 1024: # 10 MB limit
	logger.warning(f"Skipping URL {url} - Content too large")
	return None
	return response

	def clean_text(text):
	text = re.sub(r'\s{2,}', ' ', text)
	text = re.sub(r'\n+', '\n', text)
	lines = text.split('\n')
	cleaned_lines = []
	min_line_length = 20
	min_words_per_line = 3
	skip_phrases = [
	'copyright ©', 'all rights reserved', 'privacy policy', 'terms of use', 'terms and conditions',
	'cookie policy', 'subscribe', 'sign up', 'log in', 'advertisement', 'share this', 'related posts',
	'leave a reply', 'comment', 'posted on', 'by author', 'tags:', 'categories:', 'follow us', 'read more',
	'click here', 'learn more', 'next article', 'previous article', 'you may also like', 'related topics'
	]
	for line in lines:
	stripped_line = line.strip()
	lower_line = stripped_line.lower()
	if len(stripped_line) >= min_line_length and \
	len(stripped_line.split()) >= min_words_per_line and \
	not any(phrase in lower_line for phrase in skip_phrases):
	cleaned_lines.append(stripped_line)
	text = '\n'.join(cleaned_lines)
	return text.strip()

	def scrape_page_content(url, user_agent, scrape_status_ui):
	if not user_agent: logger.error("User Agent missing."); return ""
	headers = {
	'User-Agent': user_agent.random,
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.5', 'Referer': 'https://www.google.com/',
	'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1'
	}
	try:
	response = fetch_url_content(url, headers)
	if response is None: scrape_status_ui.warning(f"⚠️ Skip/Fail fetch: {url}", icon="🕸️"); return ""
	soup = BeautifulSoup(response.content, 'lxml')
	tags_to_remove = ["script", "style", "nav", "footer", "aside", "form", "header", "noscript", "button", "input", "select", "textarea", "figure", "figcaption", "iframe", "svg", "path", "meta", "link"]
	for element in soup(tags_to_remove): element.decompose()
	for comment in soup.find_all(string=lambda text: isinstance(text, Comment)): comment.extract()
	main_content = (soup.find('main') or soup.find('article') or soup.find(role='main') or
	soup.find('div', class_=re.compile(r'(content\|main\|body\|post\|entry\|article)', re.I)) or
	soup.find('div', id=re.compile(r'(content\|main\|body\|post\|entry\|article)', re.I)))
	target_soup = main_content if main_content else soup.body
	if not target_soup: logger.warning(f"No body/main: {url}"); scrape_status_ui.warning(f"⚠️ No body/main: {url}", icon="🕸️"); return ""
	texts = target_soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'td', 'th', 'blockquote', 'span'])
	content_parts = []
	for elem in texts:
	if elem.find_parent(tags_to_remove): continue
	elem_text = elem.get_text(separator=' ', strip=True)
	if len(elem_text) > 10 and len(elem_text.split()) > 1:
	if elem.name in ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote', 'tr', 'div']: # Added div for structure
	content_parts.append(elem_text + "\n")
	else: content_parts.append(elem_text + " ")
	content = "".join(content_parts)
	cleaned_content = clean_text(content)
	if len(cleaned_content) < 150: logger.warning(f"Low content ({len(cleaned_content)} chars): {url}"); scrape_status_ui.warning(f"⚠️ Low content: {url}", icon="🕸️")
	else: logger.info(f"Scraped {len(cleaned_content)} chars: {url}"); scrape_status_ui.success(f"✅ Scraped: {url} ({len(cleaned_content)} chars)", icon="🕸️")
	time.sleep(0.6)
	return cleaned_content
	except requests.exceptions.RequestException as e: logger.warning(f"Final scrape fail: {url}. Err: {e}"); scrape_status_ui.error(f"❌ Fail scrape: {url} ({e})", icon="🕸️"); return ""
	except Exception as e: logger.error(f"Unexpected scrape error: {url}: {e}", exc_info=True); scrape_status_ui.error(f"❌ Error scraping: {url} (Logs)", icon="🕸️"); return ""

	def get_top_urls(keyword, num_results):
	logger.info(f"Fetching top {num_results} URLs for keyword: '{keyword}'")
	try:
	urls = list(search(keyword, num_results=num_results, sleep_interval=2.5, lang="en", timeout=15))
	logger.info(f"Found URLs: {urls}")
	if not urls: st.warning(f"⚠️ No Google search results found for '{keyword}'."); return []
	return urls
	except Exception as e:
	error_message = str(e); logger.error(f"GSearch Error: {error_message}", exc_info=True)
	if "429" in error_message: st.error(f"❌ Google search blocked (429). WAIT before retrying.")
	elif "timed out" in error_message: st.error(f"❌ Google search timed out.")
	else: st.error(f"❌ GSearch Error: {error_message[:100]}...")
	return []

	def build_content_generation_prompt(keyword, competitor_texts, tone, audience, model_id):
	logger.info(f"Build content gen prompt. Tone: {tone}, Audience: {audience}. Comp length: {len(competitor_texts)}")
	if len(competitor_texts) > MAX_COMPETITOR_TEXT_LENGTH:
	competitor_summary = competitor_texts[:MAX_COMPETITOR_TEXT_LENGTH] + "... [Truncated]"
	logger.warning(f"Comp text truncated.")
	else: competitor_summary = competitor_texts
	system_prompt = f"""You are an expert SEO Content Strategist & world-class Copywriter. Task: Analyze competitor text & generate a significantly superior, comprehensive, user-first article for keyword '{keyword}', targeting '{audience}' audience with '{tone}' tone. Focus on quality, depth, clarity, fulfilling user intent better than competition."""
	user_prompt = f"""Keyword: "{keyword}"
	Audience: {audience}
	Tone: {tone}
	Objective: Generate exceptional, SEO-optimized article for "{keyword}" designed to outperform top content via superior value, insights, UX.
	Competitor Analysis Context (Analyze for topics, depth, strengths, WEAKNESSES/GAPS):
	--- BEGIN COMPETITOR ---
	{competitor_summary}
	--- END COMPETITOR ---
	Content Gen Instructions:
	1. Value & Depth: Be demonstrably better. Deeper, clearer, actionable advice, unique perspectives/data, fill gaps. Address user intent exhaustively.
	2. User-First & Humanized: Write for '{audience}' in '{tone}'. Clear, concise, short paras, varied sentences, engaging Qs. Logical flow, readable.
	3. Structure (Strict Markdown): Compelling H2 Title. Engaging Intro (50-150 words): Hook, purpose/value, outline. Logical Sections (H2)/Sub-sections (H3): Descriptive, keyword-aware headings. Readability: Bullets (`* `), Numbered lists (`1. `), Bold (strategic). Comprehensive Body: Expand beyond competitors. Strong Conclusion: Summarize takeaways, final insight/CTA.
	4. SEO (Natural): Weave "{keyword}" & LSI terms into title, headings, intro, body, conclusion. Prioritize relevance/clarity over density. NO keyword stuffing.
	5. Originality & Credibility: 100% unique. Use comp text ONLY for analysis. NO plagiarism. Factual accuracy.
	6. Negative Constraints: DO NOT: Rehash competitors; use preambles/sign-offs; use excessive jargon (unless 'Experts'); write long paragraphs; stuff keywords; invent facts.
	Output: ONLY the Markdown article, starting with H2 title."""
	messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
	logger.info(f"Content prompt done for {model_id}.")
	return messages

	def build_internal_link_prompt(generated_content, keyword, website_url):
	logger.info(f"Build internal link prompt for URL: {website_url}")
	system_prompt = "You are an SEO assistant specialized in identifying internal linking opportunities."
	user_prompt = f"""Website Base URL: {website_url}
	Main Topic of Article: "{keyword}"
	Task: Review the article below. Identify 3-5 phrases/sentences for internal links relevant to {website_url}.
	For each opportunity, provide:
	1. Exact anchor text phrase/sentence from article.
	2. Brief description of the type of relevant content needed (e.g., "detailed guide on [sub-topic]", "service page for [service]").
	IMPORTANT: Do NOT invent URLs. Describe the type of page. Choose natural anchor text. Focus on value. Format as Markdown numbered list.
	Article Content (Analyze first ~8000 chars):
	--- BEGIN ARTICLE ---
	{generated_content[:8000]}
	--- END ARTICLE ---"""
	messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
	return messages

	def run_llm_generation(pipe, messages, max_tokens):
	if pipe is None: st.error("❌ LLM Pipeline missing."); return None
	model_id = pipe.model.name_or_path
	logger.info(f"Running generation: {model_id}. Max tokens: {max_tokens}.")
	start_time = time.time()
	try:
	gen_args = {"max_new_tokens": max_tokens, "temperature": 0.7, "top_p": 0.95, "top_k": 40,
	"do_sample": True, "pad_token_id": pipe.tokenizer.eos_token_id, "eos_token_id": pipe.tokenizer.eos_token_id}
	logger.info(f"Gen args: {gen_args}")
	results = pipe(messages, **gen_args)
	# --- Robust Extraction ---
	assistant_response = None
	if results and results[0] and 'generated_text' in results[0]:
	output_data = results[0]['generated_text']
	if isinstance(output_data, list): assistant_message = next((msg['content'] for msg in reversed(output_data) if msg['role'] == 'assistant'), None); assistant_response = assistant_message
	elif isinstance(output_data, str):
	last_prompt_content = messages[-1]['content']
	last_prompt_index = output_data.rfind(last_prompt_content)
	if last_prompt_index != -1: potential_response = output_data[last_prompt_index + len(last_prompt_content):].strip()
	else: potential_response = output_data
	assistant_response = re.sub(r"^(assistant\|ASSISTANT\|</s>\|<\\|im_end\\|>\|<\\|assistant\\|>)\s[:\n]", "", potential_response, flags=re.IGNORECASE \| re.DOTALL).strip()
	else: logger.error(f"Unexpected output format: {type(output_data)}")
	else: logger.error(f"Unexpected LLM output structure: {results}")
	# --- Validation ---
	if assistant_response:
	duration = time.time() - start_time; logger.info(f"Gen success ({model_id}) {duration:.2f}s. Len: {len(assistant_response)}.")
	assistant_response = re.sub(r"^```markdown\n", "", assistant_response).strip(); assistant_response = re.sub(r"\n```$", "", assistant_response).strip()
	if len(assistant_response) < 30: logger.warning(f"Gen output very short ({len(assistant_response)})."); st.warning("⚠️ Gen output very short.")
	return assistant_response
	else: logger.error(f"Failed parse assistant response. Output: {results}"); st.error("❌ Failed parse LLM response. Check logs."); return None
	except torch.cuda.OutOfMemoryError: logger.error(f"OOM Error ({model_id})!", exc_info=True); st.error(f"❌ OOM Error ({model_id}). Try smaller model/less tokens/restart."); clear_gpu_memory(); return None
	except Exception as e: logger.error(f"Unhandled gen error ({model_id}): {e}", exc_info=True); st.error(f"❌ Unexpected gen error: {e}"); return None

	# --- Streamlit App UI ---

	st.set_page_config(layout="wide", page_title="On-Demand SEO Content Gen")

	# --- Sidebar ---
	with st.sidebar:
	st.header("⚙️ Configuration")

	# Model Selection & Loading Area
	st.subheader("1. Select & Load Model")
	selected_model_key = st.selectbox(
	"Choose Language Model:",
	options=list(MODEL_OPTIONS.keys()),
	index=list(MODEL_OPTIONS.keys()).index(DEFAULT_MODEL_KEY),
	key="model_selector", # Key for potential state access
	help="Choose AI model. Performance & resources vary. Load required."
	)
	selected_model_id = MODEL_OPTIONS[selected_model_key]

	# Display current status and load button
	load_button_placeholder = st.empty() # Placeholder for dynamic button text/state
	model_status_placeholder = st.empty() # Placeholder for status message

	if st.session_state.get('current_model_id') == selected_model_id and st.session_state.get('current_model_pipeline') is not None:
	model_status_placeholder.success(f"✅ Loaded: `{selected_model_id}`")
	load_button_text = f"Switch from {selected_model_key}" # Or "Reload"
	elif st.session_state.get('current_model_pipeline') is not None:
	model_status_placeholder.warning(f"⚠️ Loaded: `{st.session_state.current_model_id}`\nSelected: `{selected_model_id}`")
	load_button_text = f"Unload Current & Load {selected_model_key}"
	else:
	model_status_placeholder.info("ℹ️ No model loaded.")
	load_button_text = f"Load Selected: {selected_model_key}"

	if load_button_placeholder.button(load_button_text, key="load_model"):
	load_model(selected_model_id)
	# Rerun to update status placeholders immediately after load attempt
	st.rerun()

	st.markdown("---")

	# Content Settings
	st.subheader("2. Content Settings")
	with st.expander("Adjust Content Parameters", expanded=False):
	num_results = st.slider("Competitors to Analyze:", min_value=1, max_value=8, value=DEFAULT_NUM_RESULTS, step=1)
	selected_tone = st.selectbox("Content Tone:", options=TONE_OPTIONS, index=TONE_OPTIONS.index("Engaging"))
	selected_audience = st.selectbox("Target Audience:", options=AUDIENCE_OPTIONS, index=AUDIENCE_OPTIONS.index("General Audience"))
	max_gen_tokens = st.number_input("Max Generation Tokens:", min_value=500, max_value=8192, value=DEFAULT_MAX_GENERATION_TOKENS, step=100)

	# Internal Linking
	st.subheader("3. Internal Linking (Optional)")
	with st.expander("Configure Link Suggestions", expanded=False):
	website_url = st.text_input("Your Website URL:", placeholder="https://www.example.com", value=st.session_state.get("last_website_url", ""), key="website_url_input")
	# Update state immediately on change if needed, or just read before use
	st.session_state.last_website_url = website_url

	st.markdown("---")
	st.header("ℹ️ App Info & Actions")
	st.info(f"""
	- Status: {'Model Loaded' if st.session_state.current_model_pipeline else 'No Model Loaded'}
	- Competitors: Top {num_results}
	- Max Generation: ~{max_gen_tokens} tokens
	""")
	st.warning("""
	- Load Model First: Select a model and click 'Load' before generating.
	- Resource Use: Models need significant RAM/GPU. Loading WILL fail if resources are insufficient.
	- Review Output: AI provides drafts. ALWAYS review, edit, fact-check.
	""")
	if st.button("Clear Scraped/Generated Data", key="clear_data"):
	reset_app_data()

	# --- Main App Area ---
	st.title("✨ On-Demand SEO Content Generator ✨")
	st.markdown(f"Load your chosen AI model, then generate SEO-focused content.")

	# User Input Area
	st.subheader("Keyword & Generation")
	keyword = st.text_input("Enter Primary Target Keyword:", placeholder="e.g., vertical hydroponics guide", value=st.session_state.get("last_keyword", ""), key="keyword_input")

	# Disable button if model not loaded
	generate_button_disabled = st.session_state.current_model_pipeline is None
	generate_button_help = "Load a model from the sidebar first." if generate_button_disabled else "Analyze competitors and generate article."

	analyze_button = st.button(
	"Analyze Competitors & Generate Content",
	type="primary",
	key="generate_button",
	disabled=generate_button_disabled,
	help=generate_button_help
	)

	st.markdown("---")

	# --- Main Workflow Triggered by Button ---
	if analyze_button:
	# Double check model is loaded (though button should be disabled)
	if not st.session_state.current_model_pipeline:
	st.error("❌ Cannot generate: No model loaded. Please use the sidebar.")
	st.stop()
	if not keyword:
	st.warning("⚠️ Please enter a keyword.")
	st.stop()

	st.session_state.last_keyword = keyword # Store keyword for potential reuse
	ua = get_user_agent() # Ensure user agent is ready
	if not ua: st.error("❌ User Agent failed. Cannot scrape."); st.stop()

	# Reset previous generation results for this run
	st.session_state.generated_content = ""
	st.session_state.internal_link_suggestions = ""

	# --- Step 1: Scrape Competitors (with status updates) ---
	# Check if scrape needed
	if keyword != st.session_state.get('_internal_last_scrape_keyword', None) or not st.session_state.competitor_analysis_text:
	logger.info(f"Scraping needed for '{keyword}'.")
	st.session_state.competitor_analysis_text = "" # Clear old text
	st.session_state.scraped_urls = []
	st.session_state['_internal_last_scrape_keyword'] = "" # Reset marker until success

	scrape_container = st.container()
	with scrape_container:
	st.info(f"🕸️ Fetching URLs and Scraping Top {num_results} Competitors...")
	progress_text = "Scraping progress..."
	scrape_progress_bar = st.progress(0, text=progress_text)
	status_area = st.container() # Use container for multiple status lines

	urls = get_top_urls(keyword, num_results)
	st.session_state.scraped_urls = urls

	if urls:
	all_texts = []
	scraped_count = 0
	for i, url in enumerate(urls):
	with status_area: # Show status within the designated area
	scrape_status_ui = st.empty() # Placeholder for single URL status
	content = scrape_page_content(url, ua, scrape_status_ui)
	if content:
	all_texts.append(content)
	scraped_count += 1
	scrape_progress_bar.progress((i + 1) / len(urls), text=f"Processed URL {i+1}/{len(urls)}...")
	time.sleep(0.1) # UI update breather

	st.session_state.competitor_analysis_text = "\n\n --- ARTICLE SEPARATOR --- \n\n".join(all_texts)
	st.session_state['_internal_last_scrape_keyword'] = keyword # Mark scrape success for this keyword

	if st.session_state.competitor_analysis_text:
	scrape_container.success(f"✅ Scraped {scraped_count}/{len(urls)} pages. Analysis text: {len(st.session_state.competitor_analysis_text)} chars.")
	else:
	scrape_container.error("❌ Failed to scrape sufficient content. Cannot generate article.")
	st.stop()
	else:
	scrape_container.error("❌ Could not retrieve competitor URLs. Cannot proceed.")
	st.stop()
	else:
	st.success(f"✔️ Using previously scraped data for '{keyword}'. ({len(st.session_state.competitor_analysis_text)} chars).")

	# --- Step 2: Generate Main Content ---
	st.info(f"✍️ Generating Content with {st.session_state.current_model_id}...")
	generation_status = st.status("Sending request to LLM...")
	with generation_status:
	st.write(f"Tone: {selected_tone}, Audience: {selected_audience}, Max Tokens: {max_gen_tokens}")
	gen_prompt = build_content_generation_prompt(
	keyword, st.session_state.competitor_analysis_text, selected_tone, selected_audience, st.session_state.current_model_id
	)
	generated_content = run_llm_generation(st.session_state.current_model_pipeline, gen_prompt, max_gen_tokens)
	st.session_state.generated_content = generated_content

	if generated_content:
	generation_status.update(label="✅ Content Generation Complete!", state="complete")
	else:
	generation_status.update(label="❌ Content Generation Failed.", state="error")
	st.stop() # Stop if main content fails

	# --- Display Outputs (Outside the button click conditional) ---
	if st.session_state.generated_content:
	st.markdown("---")
	st.subheader("📝 Generated SEO Content")
	st.markdown(st.session_state.generated_content)
	st.text_area("Copyable Markdown:", st.session_state.generated_content, height=400, key="generated_content_area_display")

	# --- Internal Linking Section ---
	if st.session_state.last_website_url: # Only show if URL was provided
	st.markdown("---")
	st.subheader("🔗 Internal Linking Suggestions")
	if st.button("Suggest Internal Links", key="suggest_links_button_display"):
	link_status = st.status(f"Analyzing content for link opportunities ({st.session_state.current_model_id})...")
	with link_status:
	st.write(f"Website context: {st.session_state.last_website_url}")
	link_prompt = build_internal_link_prompt(st.session_state.generated_content, keyword, st.session_state.last_website_url)
	link_suggestions = run_llm_generation(st.session_state.current_model_pipeline, link_prompt, max_tokens=500) # Use fewer tokens
	st.session_state.internal_link_suggestions = link_suggestions
	if link_suggestions: link_status.update(label="✅ Link suggestions generated!", state="complete")
	else: link_status.update(label="❌ Failed to generate link suggestions.", state="error")

	# Display suggestions if they exist in state
	if st.session_state.internal_link_suggestions:
	st.markdown(st.session_state.internal_link_suggestions)
	st.info("ℹ️ AI suggestions only. Verify relevance and find actual URLs on your site.")
	else:
	st.markdown("---")
	st.info("Provide your website URL in the sidebar to enable internal link suggestions after generating content.")