"""
RSS feed ingestion with Google News RSS support.
"""

import logging
from datetime import datetime, timezone
from typing import Optional
from urllib.parse import quote_plus

import feedparser
from dateutil import parser as dateutil_parser

from app.settings import get_settings
from app.utils import clean_text, normalize_url

logger = logging.getLogger(__name__)


def build_google_news_rss_url(query: str, language: str = "en") -> str:
    """
    Build a Google News RSS URL for a search query.
    
    Args:
        query: Search query (e.g., "copper price")
        language: Language code (e.g., "en")
    
    Returns:
        Google News RSS URL
    """
    encoded_query = quote_plus(query)
    # Google News RSS format
    url = f"https://news.google.com/rss/search?q={encoded_query}&hl={language}&gl=US&ceid=US:{language}"
    return url


def parse_rss_date(date_str: str) -> Optional[datetime]:
    """Parse RSS date string to datetime."""
    if not date_str:
        return None
    
    try:
        dt = dateutil_parser.parse(date_str)
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        return dt
    except Exception:
        return None


def fetch_rss_feed(
    url: str,
    max_items: int = 100
) -> list[dict]:
    """
    Fetch and parse an RSS feed.
    
    Args:
        url: RSS feed URL
        max_items: Maximum number of items to return
    
    Returns:
        List of article dicts with keys: title, url, published_at, source, description
    """
    logger.info(f"Fetching RSS feed: {url}")
    
    try:
        feed = feedparser.parse(url)
        
        if feed.bozo and feed.bozo_exception:
            logger.warning(f"RSS feed parsing warning: {feed.bozo_exception}")
        
        articles = []
        
        for entry in feed.entries[:max_items]:
            try:
                # Extract fields
                title = entry.get("title", "")
                link = entry.get("link", "")
                published = entry.get("published", entry.get("updated", ""))
                source = entry.get("source", {}).get("title", "")
                
                # Google News wraps the actual source in the title
                # Format: "Article Title - Source Name"
                if not source and " - " in title:
                    parts = title.rsplit(" - ", 1)
                    if len(parts) == 2:
                        title, source = parts
                
                # Get description/summary
                description = entry.get("summary", entry.get("description", ""))
                
                # Clean content
                title = clean_text(title)
                description = clean_text(description)
                
                if not title:
                    continue
                
                # Parse date
                published_at = parse_rss_date(published)
                if not published_at:
                    published_at = datetime.now(timezone.utc)
                
                articles.append({
                    "title": title,
                    "url": normalize_url(link) if link else None,
                    "published_at": published_at,
                    "source": source or "Google News",
                    "description": description or None,
                })
                
            except Exception as e:
                logger.debug(f"Error parsing RSS entry: {e}")
                continue
        
        logger.info(f"Fetched {len(articles)} articles from RSS")
        return articles
        
    except Exception as e:
        logger.error(f"Failed to fetch RSS feed: {e}")
        return []


def fetch_google_news(
    query: Optional[str] = None,
    language: Optional[str] = None,
    max_items: int = 100
) -> list[dict]:
    """
    Fetch articles from Google News RSS.
    
    Args:
        query: Search query. If None, uses settings.
        language: Language code. If None, uses settings.
        max_items: Maximum articles to fetch
    
    Returns:
        List of article dicts
    """
    settings = get_settings()
    
    query = query or settings.news_query
    language = language or settings.news_language
    
    url = build_google_news_rss_url(query, language)
    return fetch_rss_feed(url, max_items)


def fetch_custom_rss_feeds(
    urls: list[str],
    max_items_per_feed: int = 50
) -> list[dict]:
    """
    Fetch articles from multiple custom RSS feeds.
    
    Args:
        urls: List of RSS feed URLs
        max_items_per_feed: Max items per feed
    
    Returns:
        Combined list of article dicts
    """
    all_articles = []
    
    for url in urls:
        try:
            articles = fetch_rss_feed(url, max_items_per_feed)
            all_articles.extend(articles)
        except Exception as e:
            logger.error(f"Failed to fetch RSS {url}: {e}")
    
    return all_articles