"""
Dev flow orchestration script for content scraping and email delivery.

This module coordinates the dev workflow:
1. Scrape article content
2. Rewrite and generate tags
3. Fetch featured image
4. Generate HTML webpage
5. Send HTML email to recipient
6. Summary
"""

import sys
import os
from logger import logger
from config import (
    validate_config,
    DEV_EMAIL_RECIPIENT,
    DEV_CACHE_CONTENT,
    DEV_CACHE_SEO,
    DEV_CACHE_IMAGE,
)
from index_manager import IndexManager
from scraper import ArticleScraper
from content_processor import ContentProcessor
from image_handler import ImageHandler
from html_handler import HTMLHandler
from email_handler import EmailHandler
from status_tracker import StatusTracker


def get_or_scrape_content(url):
    """
    Get content from cache or scrape it fresh.

    Args:
        url (str): Article URL

    Returns:
        dict: Contains 'title' and 'body' keys
    """
    try:
        index_manager = IndexManager()

        if index_manager.url_exists(url):
            logger.info(f"URL found in cache: {url}")
            cached_data = index_manager.load_scraped_data(url)
            if cached_data and "sources" in cached_data:
                return cached_data
            if cached_data:
                logger.info("Cached entry is missing 'sources' key (old format) — re-scraping to refresh")

        logger.info(f"Scraping fresh content: {url}")
        scraper = ArticleScraper()
        article_data = scraper.scrape(url)

        # Save to cache
        index_manager.save_scraped_data(
            url,
            article_data["title"],
            article_data["body"],
            sources=article_data.get("sources", []),
        )

        return article_data

    except Exception as e:
        logger.error(f"Failed to get/scrape content: {e}")
        raise


def process_article_dev(url, email_recipient=None):
    """
    Dev workflow to process an article and send via email.

    Args:
        url (str): Article URL to process
        email_recipient (str, optional): Email recipient. Defaults to config value.

    Raises:
        Exception: If any step fails
    """
    if email_recipient is None:
        email_recipient = DEV_EMAIL_RECIPIENT

    status_tracker = StatusTracker("dev")
    
    try:
        logger.info("=" * 70)
        logger.info(f"Starting DEV flow - article processing: {url}")
        logger.info("=" * 70)

        status_tracker.mark_in_progress(url)

        # Step 1: Get or scrape content
        logger.info("\n[1/6] Retrieving article content...")
        article_data = get_or_scrape_content(url)
        original_title = article_data["title"]
        original_body = article_data["body"]
        logger.info(f"✓ Content retrieved: {original_title} and size is {len(original_body)} characters")
        logger.debug(f"Original body: {original_body[:300000]}")

        index_manager = IndexManager()
        dev_cache = index_manager.load_dev_cache(url)
        cached_content = DEV_CACHE_CONTENT and dev_cache is not None and bool(dev_cache.get("title"))
        cached_seo     = DEV_CACHE_SEO     and dev_cache is not None and bool(dev_cache.get("seo_data"))
        cached_image   = DEV_CACHE_IMAGE   and dev_cache is not None and bool(dev_cache.get("image_info"))

        # Step 2: Generate SEO data first
        processor = None
        seo_focus_words = []
        if cached_seo:
            logger.info("\n[2/6] Loading cached SEO data...")
            seo_data = dev_cache["seo_data"]
            seo_focus_words = [seo_data.get("focus_keyword", "")] + seo_data.get("secondary_keywords", [])
            tags = seo_data.get("tags", [])
            logger.info(f"✓ Reused cached SEO: focus_keyword='{seo_data.get('focus_keyword')}', {len(tags)} tags")
        else:
            logger.info("\n[2/6] Generating SEO metadata with AI...")
            processor = ContentProcessor()
            seo_data = processor.generate_seo_data(original_title, original_body)
            seo_focus_words = [seo_data.get("focus_keyword", "")] + seo_data.get("secondary_keywords", [])
            tags = seo_data.get("tags", [])
            logger.info(f"✓ SEO generated: focus_keyword='{seo_data.get('focus_keyword')}', {len(tags)} tags")

        # Step 2b: Rewrite content (title + body)
        if cached_content:
            logger.info("\n[2b/6] Loading cached content (title + body)...")
            new_title = dev_cache["title"]
            new_body = dev_cache["body"]
            logger.info(f"✓ Reused cached content: {new_title}")
        else:
            logger.info("\n[2b/6] Rewriting content with AI...")
            if processor is None:
                processor = ContentProcessor()
            processed_data = processor.rewrite_content(original_title, original_body, url, seo_focus_words, sources=article_data.get("sources", []))
            new_title = processed_data["title"]
            new_body = processed_data["body"]
            logger.info(f"✓ Content rewritten: {new_title}")

        # Step 3: Fetch featured image
        image_handler = ImageHandler()
        if cached_image:
            logger.info("\n[3/6] Reusing cached featured image for dev flow...")
            image_info = dev_cache["image_info"]
            image_data = image_handler.download_image(image_info["url"])
            logger.info(f"✓ Reused image: {image_info.get('url')}")
        else:
            logger.info("\n[3/6] Fetching featured image...")
            image_info = image_handler.fetch_image(tags)
            image_data = image_handler.download_image(image_info["url"])
            logger.info(f"✓ Image fetched from {image_info['credit']}")

        if DEV_CACHE_CONTENT or DEV_CACHE_SEO or DEV_CACHE_IMAGE:
            index_manager.save_dev_cache(
                url,
                title=new_title,
                body=new_body,
                tags=tags,
                image_info=image_info,
                seo_data=seo_data,
            )

        # Step 4: Generate HTML webpage
        logger.info("\n[4/6] Generating HTML webpage...")
        html_handler = HTMLHandler()

        # Create featured image HTML
        img_alt = seo_focus_words[0] if seo_focus_words else new_title.replace('"', "")
        credit_name = image_info.get("credit", "")
        credit_url = image_info.get("credit_url", "https://unsplash.com")
        if credit_name:
            photo_credit_html = (
                f'<p style="font-size:0.75rem;color:#888888;text-align:left;margin-top:0.25rem;padding-left:0;">'
                f'Photo by <a href="{credit_url}" target="_blank" rel="noopener">{credit_name}</a> on '
                f'<a href="https://unsplash.com" target="_blank" rel="noopener">Unsplash</a>'
                f'</p>'
            )
        else:
            photo_credit_html = (
                '<p style="font-size:0.75rem;color:#888888;text-align:left;margin-top:0.25rem;padding-left:0;">'
                'Photo via <a href="https://unsplash.com" target="_blank" rel="noopener">Unsplash</a>'
                '</p>'
            )
        featured_image_html = (
            f'<figure style="margin:0;padding:0;width:100%;">'
            f'<img src="{image_info["url"]}" alt="{img_alt}" '
            f'style="width:100%;height:240px;object-fit:cover;object-position:center;display:block;">'
            f'</figure>'
            + photo_credit_html
        )
        enhanced_body = (
            featured_image_html
            + '\n<div style="max-width:100%;overflow-x:hidden;box-sizing:border-box;word-break:break-word;overflow-wrap:break-word;">'
            + new_body
            + "</div>"
        )

        # Generate and save HTML
        html_file_path = html_handler.generate_and_save(
            title=new_title,
            body=enhanced_body,
            tags=tags,
            image_url=image_info["url"],
            original_url=url,
        )
        logger.info(f"✓ HTML file generated: {html_file_path}")

        # Extract filename from path
        html_filename = os.path.basename(html_file_path)

        # # Step 5: Send HTML email
        # logger.info("\n[5/6] Saving email file...")
        # try:
        #     email_handler = EmailHandler()

        #     # Read HTML content from file
        #     with open(html_file_path, "r", encoding="utf-8") as f:
        #         html_content = f.read()

        #     # Save email file
        #     email_file_path = email_handler.save_article_email(
        #         recipient=email_recipient,
        #         article_title=new_title,
        #         html_content=html_content,
        #     )
        #     logger.info(f"✓ Email saved to {email_file_path}")

        # except ValueError as e:
        #     logger.warning(f"Email not configured: {e}")
        #     logger.info("Skipping email step - configure EMAIL_SENDER to enable")
        #     email_file_path = None
        # except (ConnectionRefusedError, OSError) as e:
        #     logger.warning(f"Mail server not available: {e}")
        #     logger.info("Skipping email step - mail server may not be running. Install Postfix: sudo apt install postfix")
        # except Exception as e:
        #     logger.warning(f"Failed to send email (continuing anyway): {e}")

        # Step 6: Summary
        logger.info("\n[6/6] Processing complete!")
        logger.info("=" * 70)
        logger.info("✅ SUCCESS - DEV flow completed")
        logger.info(f"   Title: {new_title}")
        logger.info(f"   Tags: {', '.join(tags)}")
        logger.info(f"   HTML File: {html_file_path}")
        logger.info("=" * 70 + "\n")

        # Mark as successful
        status_tracker.mark_successful(
            url=url,
            filename=html_filename,
            title=original_title,
            new_title=new_title,
        )

        return {
            "title": new_title,
            "tags": tags,
            "html_file": html_file_path,
        }

    except Exception as e:
        logger.error("=" * 70)
        logger.error(f"❌ FAILED - DEV flow failed: {e}")
        logger.error("=" * 70)
        
        # Determine which step failed
        failure_step = "unknown"
        if "Retrieving article content" in str(e):
            failure_step = "step_1_scrape"
        elif "Rewriting content" in str(e):
            failure_step = "step_2_rewrite"
        elif "Fetching featured image" in str(e):
            failure_step = "step_3_image"
        elif "Generating HTML" in str(e):
            failure_step = "step_4_html"
        else:
            failure_step = f"unknown_error: {str(e)[:50]}"
        
        # Mark as failed
        try:
            original_title = article_data.get("title", "") if 'article_data' in locals() else ""
            status_tracker.mark_failed(
                url=url,
                failure_step=failure_step,
                title=original_title,
            )
        except:
            pass  # If status tracking fails, continue with raising exception
        
        raise


def main():
    """Main entry point for dev flow."""
    try:
        # Validate configuration
        logger.info("Validating configuration...")
        validate_config()
        logger.info("✓ Configuration valid")

        # Process article
        #url = "https://www.healthline.com/nutrition/12-omega-3-rich-foods"
        #url = "https://www.healthline.com/health/10-gut-foods"

        urls = [
            # "https://www.healthline.com/health/the-benefits-of-biotin",
            # "https://www.healthline.com/nutrition/12-omega-3-rich-foods",
            #"https://www.healthline.com/nutrition/how-much-collagen-per-day",
            ##"https://www.healthline.com/health/beauty-skin-care/supplements-for-better-skin",
            #"https://www.healthline.com/nutrition/ashwagandha",
            #"https://www.healthline.com/health/10-gut-foods"

#            "https://indianexpress.com/article/lifestyle/food-wine/icmr-protein-supplements-powder-health-kidney-bones-9318508/",
            #"https://www.verywellhealth.com/keto-diet-long-term-risks-5197991"
            #"https://www.verywellhealth.com/best-time-of-day-to-eat-your-fiber-11945630"
            #"https://www.webmd.com/vitamins/ai/ingredientmono-464/gamma-aminobutyric-acid-gaba#overview"
            # "https://www.webmd.com/vitamins-supplements/activated-charcoal",
            # "https://www.webmd.com/vitamins-supplements/5-htp",
            # "https://www.webmd.com/vitamins/ai/ingredientmono-1101/holy-basil",
            #  "https://www.webmd.com/vitamins/ai/ingredientmono-1062/hyaluronic-acid",
            # "https://www.webmd.com/vitamins/ai/ingredientmono-875/l-arginine",
            # "https://www.webmd.com/vitamins-supplements/evening-primrose-oil",
            # "https://www.webmd.com/vitamins/ai/ingredientmono-1242/moringa",
            #  "https://www.webmd.com/vitamins-supplements/xylitol"

           # "https://www.healthline.com/nutrition/best-testosterone-booster-supplements#our-picks"

            #"https://www.verywellhealth.com/keto-diet-long-term-risks-5197991"


 #Batch2
            #"https://www.healthline.com/nutrition/best-testosterone-booster-supplements#our-picks"
        #"https://www.medicalnewstoday.com/articles/how-much-protein-do-you-need-to-build-muscle#How-much-protein-do-you-need",
        
        # to completed
        # "https://www.medicalnewstoday.com/articles/is-it-better-to-eat-several-small-meals-or-fewer-larger-ones",
        # "https://www.medicalnewstoday.com/articles/not-all-plant-based-diets-are-the-same-junk-veggie-food-and-its-impact-on-health",
        # "https://www.medicalnewstoday.com/articles/intermittent-fasting-is-it-all-its-cracked-up-to-be",


#        "https://www.who.int/health-topics/nutrition#tab=tab_1",
#failing
#        "https://publichealth.jhu.edu/2025/the-evidence-behind-seed-oils-health-effects",
      #  "https://nutritionsource.hsph.harvard.edu/what-should-you-eat/vegetables-and-fruits",
        # "https://health.clevelandclinic.org/plant-based-milk-options",
        # "https://www.nhs.uk/live-well/eat-well/food-guidelines-and-food-labels/the-eatwell-guide/",
        # "https://www.futureoffood.ox.ac.uk/improving-diet-and-nutrition",
#         "https://medlineplus.gov/ency/article/002465.htm",
        # "https://www.bbc.com/future/article/20260424-diet-why-enjoying-your-food-is-key-to-weight-loss",
        # "https://www.fda.gov/food/nutrition-food-labeling-and-critical-foods/fdas-nutrition-initiatives",
        # "https://www.medindia.net/news/healthwatch/can-peanut-butter-keep-you-stronger-as-you-age-223205-1.htm"
#        "https://www.healthline.com/health/type-2-diabetes/basal-insulin-types-benefits-dosage-side-effects"
        "https://www.webmd.com/vitamins/ai/ingredientmono-707/java-tea#overview"
        

        ]

        # 2. Loop through the list and run the function for each one
        for url in urls:
            process_article_dev(url)
        

    except ValueError as e:
        logger.error(f"Configuration error: {e}")
        sys.exit(1)
    except Exception as e:
        logger.error(f"Fatal error: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()