"""
Reddit Financial News Scraper
Scrapes financial, trading, quant, and geopolitical news from Reddit
No authentication required - uses public RSS feeds
"""

import feedparser
import logging
from datetime import datetime, timedelta
from typing import List, Dict
import re

logger = logging.getLogger(__name__)


class RedditFinanceMonitor:
    """
    Reddit financial news aggregator using RSS feeds
    No authentication required - public RSS feeds only
    """

    # Premium financial subreddits
    SUBREDDITS = {
        # Financial & Markets
        'wallstreetbets': {
            'url': 'https://www.reddit.com/r/wallstreetbets/top/.rss?t=day',
            'weight': 1.6,
            'specialization': ['markets'],
            'category': 'markets'
        },
        'stocks': {
            'url': 'https://www.reddit.com/r/stocks/top/.rss?t=day',
            'weight': 1.7,
            'specialization': ['markets'],
            'category': 'markets'
        },
        'investing': {
            'url': 'https://www.reddit.com/r/investing/top/.rss?t=day',
            'weight': 1.8,
            'specialization': ['markets', 'macro'],
            'category': 'markets'
        },
        'stockmarket': {
            'url': 'https://www.reddit.com/r/StockMarket/top/.rss?t=day',
            'weight': 1.6,
            'specialization': ['markets'],
            'category': 'markets'
        },
        'options': {
            'url': 'https://www.reddit.com/r/options/top/.rss?t=day',
            'weight': 1.5,
            'specialization': ['markets'],
            'category': 'markets'
        },
        'daytrading': {
            'url': 'https://www.reddit.com/r/Daytrading/top/.rss?t=day',
            'weight': 1.5,
            'specialization': ['markets'],
            'category': 'markets'
        },
        'securityanalysis': {
            'url': 'https://www.reddit.com/r/SecurityAnalysis/top/.rss?t=day',
            'weight': 1.7,
            'specialization': ['markets'],
            'category': 'markets'
        },

        # Economics & Macro
        'economics': {
            'url': 'https://www.reddit.com/r/Economics/top/.rss?t=day',
            'weight': 1.8,
            'specialization': ['macro'],
            'category': 'macro'
        },
        'economy': {
            'url': 'https://www.reddit.com/r/economy/top/.rss?t=day',
            'weight': 1.6,
            'specialization': ['macro'],
            'category': 'macro'
        },

        # Quantitative Finance
        'algotrading': {
            'url': 'https://www.reddit.com/r/algotrading/top/.rss?t=day',
            'weight': 1.7,
            'specialization': ['markets'],
            'category': 'markets'
        },
        'quantfinance': {
            'url': 'https://www.reddit.com/r/quant/top/.rss?t=day',
            'weight': 1.7,
            'specialization': ['markets'],
            'category': 'markets'
        },

        # Geopolitics
        'geopolitics': {
            'url': 'https://www.reddit.com/r/geopolitics/top/.rss?t=day',
            'weight': 1.8,
            'specialization': ['geopolitical'],
            'category': 'geopolitical'
        },
        'worldnews': {
            'url': 'https://www.reddit.com/r/worldnews/top/.rss?t=day',
            'weight': 1.7,
            'specialization': ['geopolitical'],
            'category': 'geopolitical'
        },
        'neutralpolitics': {
            'url': 'https://www.reddit.com/r/NeutralPolitics/top/.rss?t=day',
            'weight': 1.6,
            'specialization': ['geopolitical'],
            'category': 'geopolitical'
        },
    }

    # Keyword detection for additional categorization
    MACRO_KEYWORDS = [
        'Fed', 'ECB', 'BoE', 'BoJ', 'FOMC', 'Powell', 'Lagarde',
        'interest rate', 'inflation', 'CPI', 'PPI', 'GDP',
        'unemployment', 'jobs report', 'NFP', 'central bank',
        'recession', 'QE', 'quantitative easing', 'monetary policy'
    ]

    MARKETS_KEYWORDS = [
        'stock', 'equity', 'bond', 'commodity', 'oil', 'gold',
        'earnings', 'revenue', 'profit', 'IPO', 'merger',
        'acquisition', 'trading', 'options', 'futures', 'forex'
    ]

    GEOPOLITICAL_KEYWORDS = [
        'war', 'conflict', 'sanction', 'trade', 'tariff',
        'election', 'China', 'Russia', 'Ukraine', 'Taiwan',
        'Middle East', 'Iran', 'Israel', 'NATO', 'UN'
    ]

    def __init__(self):
        """Initialize Reddit monitor"""
        pass

    def _categorize_post(self, title: str, subreddit_info: Dict) -> str:
        """Categorize post based on title and subreddit"""
        title_lower = title.lower()

        # Use subreddit default category
        default_category = subreddit_info.get('category', 'markets')

        # Check keywords for override
        if any(keyword.lower() in title_lower for keyword in self.MACRO_KEYWORDS):
            return 'macro'
        elif any(keyword.lower() in title_lower for keyword in self.GEOPOLITICAL_KEYWORDS):
            return 'geopolitical'
        elif any(keyword.lower() in title_lower for keyword in self.MARKETS_KEYWORDS):
            return 'markets'

        return default_category

    def _detect_sentiment(self, title: str) -> str:
        """Simple sentiment detection based on keywords"""
        title_lower = title.lower()

        positive_words = ['bullish', 'bull', 'surge', 'gain', 'up', 'rally', 'boom', 'profit', 'growth']
        negative_words = ['bearish', 'bear', 'crash', 'loss', 'down', 'fall', 'decline', 'recession', 'crisis']

        positive_count = sum(1 for word in positive_words if word in title_lower)
        negative_count = sum(1 for word in negative_words if word in title_lower)

        if positive_count > negative_count:
            return 'positive'
        elif negative_count > positive_count:
            return 'negative'
        else:
            return 'neutral'

    def _calculate_impact(self, score: int, num_comments: int, subreddit_weight: float) -> str:
        """Calculate impact based on upvotes, comments, and subreddit weight"""
        # Normalize score (upvotes - downvotes)
        engagement_score = (score * 0.7) + (num_comments * 0.3)
        weighted_score = engagement_score * subreddit_weight

        if weighted_score > 500:
            return 'high'
        elif weighted_score > 100:
            return 'medium'
        else:
            return 'low'

    def scrape_reddit_news(self, max_posts: int = 100, hours: int = 12) -> List[Dict]:
        """
        Scrape Reddit posts from financial subreddits

        Args:
            max_posts: Maximum number of posts to return
            hours: Only include posts from the last N hours (default: 12)

        Returns:
            List of news items with metadata
        """
        all_posts = []
        seen_titles = set()
        cutoff_time = datetime.now() - timedelta(hours=hours)

        logger.info(f"Scraping Reddit posts from last {hours} hours...")

        for subreddit_name, subreddit_info in self.SUBREDDITS.items():
            try:
                logger.info(f"Fetching r/{subreddit_name}...")

                # Parse RSS feed
                feed = feedparser.parse(subreddit_info['url'])

                for entry in feed.entries[:20]:  # Get top 20 per subreddit
                    try:
                        # Parse publication date
                        if hasattr(entry, 'published_parsed'):
                            pub_date = datetime(*entry.published_parsed[:6])
                        else:
                            pub_date = datetime.now()

                        # Filter by time (last 12 hours by default)
                        if pub_date < cutoff_time:
                            continue

                        # Extract title and link
                        title = entry.title.strip()
                        link = entry.link

                        # Deduplicate
                        title_hash = hash(title[:100])
                        if title_hash in seen_titles:
                            continue
                        seen_titles.add(title_hash)

                        # Extract score and comments from content
                        score = 0
                        num_comments = 0
                        if hasattr(entry, 'content'):
                            content_text = entry.content[0].value if entry.content else ''
                            # Try to extract score from content
                            score_match = re.search(r'(\d+)\s+points?', content_text)
                            if score_match:
                                score = int(score_match.group(1))
                            # Try to extract comments
                            comment_match = re.search(r'(\d+)\s+comments?', content_text)
                            if comment_match:
                                num_comments = int(comment_match.group(1))

                        # Categorize and analyze
                        category = self._categorize_post(title, subreddit_info)
                        sentiment = self._detect_sentiment(title)
                        impact = self._calculate_impact(score, num_comments, subreddit_info['weight'])

                        # Check if breaking news (high score in last 3 hours)
                        is_breaking = (
                            (datetime.now() - pub_date).total_seconds() < 10800 and  # 3 hours
                            score > 1000
                        )

                        post_data = {
                            'title': title,
                            'summary': title,  # Reddit posts don't have separate summaries
                            'url': link,
                            'source': f"r/{subreddit_name}",
                            'timestamp': pub_date,
                            'category': category,
                            'sentiment': sentiment,
                            'impact': impact,
                            'is_breaking': is_breaking,
                            'engagement': {
                                'score': score,
                                'comments': num_comments
                            },
                            'platform': 'reddit'
                        }

                        all_posts.append(post_data)

                    except Exception as e:
                        logger.error(f"Error processing entry from r/{subreddit_name}: {e}")
                        continue

                logger.info(f"Fetched {len([p for p in all_posts if p['source'] == f'r/{subreddit_name}'])} posts from r/{subreddit_name}")

            except Exception as e:
                logger.error(f"Error fetching r/{subreddit_name}: {e}")
                continue

        # Sort by engagement score (weighted by source weight)
        all_posts.sort(key=lambda x: x['engagement']['score'] * self.SUBREDDITS.get(
            x['source'].replace('r/', ''), {}
        ).get('weight', 1.0), reverse=True)

        logger.info(f"Total Reddit posts scraped: {len(all_posts)}")

        return all_posts[:max_posts]

    def get_statistics(self) -> Dict:
        """
        Get statistics about scraped Reddit posts
        Note: Statistics are now managed by NewsCacheManager
        This method returns empty stats for backward compatibility
        """
        return {
            'total': 0,
            'high_impact': 0,
            'breaking': 0,
            'by_category': {
                'macro': 0,
                'markets': 0,
                'geopolitical': 0
            }
        }