"""
Client Company Researcher
Researches the CLIENT company to understand their offerings, value props, and target customers
This information is used to personalize emails TO prospects
"""
import logging
from typing import Dict, List, Optional, TYPE_CHECKING
from services.web_search import get_search_service
from services.web_scraper import WebScraperService

if TYPE_CHECKING:
    from mcp.registry import MCPRegistry

logger = logging.getLogger(__name__)


class ClientResearcher:
    """
    Researches CLIENT companies to understand their offerings

    Now supports MCP (Model Context Protocol) for unified search interface
    """

    def __init__(self, mcp_registry: Optional['MCPRegistry'] = None):
        """
        Initialize client researcher

        Args:
            mcp_registry: Optional MCP registry for unified search (recommended)
                         If None, falls back to direct web search service
        """
        if mcp_registry:
            # Use MCP search client
            self.search = mcp_registry.get_search_client()
            logger.info("ClientResearcher initialized with MCP search client")
        else:
            # Fallback to direct search service (legacy)
            self.search = get_search_service()
            logger.warning("ClientResearcher initialized without MCP (consider using MCP)")

        self.scraper = WebScraperService()

    async def research_client(self, client_name: str) -> Dict:
        """
        ENHANCED: Deep research on CLIENT company with extensive data extraction

        Returns:
            {
                'name': str,
                'website': str,
                'domain': str,
                'description': str,
                'offerings': [str],  # What they sell/offer
                'value_propositions': [str],  # Key benefits
                'target_customers': [str],  # Who they serve
                'industry': str,
                'use_cases': [str],  # Common use cases
                'differentiators': [str],  # What makes them unique
                'key_features': [str],  # Main features
                'pricing_model': str,  # How they charge
                'competitors': [str],  # Main competitors
                'founded': str,  # When founded
                'company_size': str,  # Employee count
                'funding': str,  # Funding info
                'raw_facts': [str]  # All extracted facts for grounding
            }
        """
        logger.info(f"ClientResearcher: ENHANCED research for '{client_name}'")
        print(f"\n[CLIENT RESEARCH] Starting ENHANCED research for '{client_name}'")

        profile = {
            'name': client_name,
            'website': '',
            'domain': '',
            'description': '',
            'offerings': [],
            'value_propositions': [],
            'target_customers': [],
            'industry': '',
            'use_cases': [],
            'differentiators': [],
            'key_features': [],
            'pricing_model': '',
            'competitors': [],
            'founded': '',
            'company_size': '',
            'funding': '',
            'integrations': [],       # NEW: Integrations and partnerships
            'awards': [],             # NEW: Awards and recognition
            'customer_testimonials': [],  # NEW: Customer success stories
            'recent_news': [],        # NEW: Recent company news
            'market_position': '',    # NEW: Market position and leadership
            'raw_facts': []          # Store all extracted facts for grounding
        }

        # Step 1: Find official website
        print(f"[CLIENT RESEARCH] Finding official website...")
        website_query = f"{client_name} official website"
        website_results = await self.search.search(website_query, max_results=3)

        if website_results:
            profile['website'] = website_results[0].get('url', '')
            profile['description'] = website_results[0].get('body', '')

            # Extract domain
            if profile['website']:
                from urllib.parse import urlparse
                parsed = urlparse(profile['website'])
                profile['domain'] = parsed.netloc.replace('www.', '')

            print(f"[CLIENT RESEARCH] Website: {profile['website']}")

        # Step 2: Understand what they offer
        print(f"[CLIENT RESEARCH] Researching offerings...")
        offerings_query = f"{client_name} products services what they offer features"
        offering_results = await self.search.search(offerings_query, max_results=5)

        # Extract offerings from search results
        for result in offering_results:
            title = result.get('title', '')
            body = result.get('body', '')

            # Store raw fact
            if body:
                profile['raw_facts'].append(f"Offerings info: {body[:300]}")

            # Look for key phrases
            if any(keyword in body.lower() for keyword in ['offer', 'provides', 'platform', 'solution', 'service']):
                # Extract the offering
                sentences = body.split('.')
                for sentence in sentences[:3]:
                    if any(kw in sentence.lower() for kw in ['offer', 'provides', 'platform', 'solution']):
                        profile['offerings'].append(sentence.strip())

        # Deduplicate and limit
        profile['offerings'] = list(set(profile['offerings']))[:5]
        print(f"[CLIENT RESEARCH] Found {len(profile['offerings'])} offerings")

        # Step 3: Find value propositions
        print(f"[CLIENT RESEARCH] Researching value propositions...")
        value_query = f"{client_name} benefits advantages why choose how it helps"
        value_results = await self.search.search(value_query, max_results=5)

        for result in value_results:
            body = result.get('body', '')

            # Store raw fact
            if body:
                profile['raw_facts'].append(f"Value props info: {body[:300]}")

            # Look for value prop indicators
            if any(keyword in body.lower() for keyword in ['help', 'benefit', 'improve', 'reduce', 'increase', 'save']):
                sentences = body.split('.')
                for sentence in sentences[:3]:
                    if any(kw in sentence.lower() for kw in ['help', 'benefit', 'improve', 'reduce', 'increase']):
                        if len(sentence) < 200:  # Not too long
                            profile['value_propositions'].append(sentence.strip())

        profile['value_propositions'] = list(set(profile['value_propositions']))[:5]
        print(f"[CLIENT RESEARCH] Found {len(profile['value_propositions'])} value props")

        # Step 4: Identify target customers
        print(f"[CLIENT RESEARCH] Identifying target customers...")
        customers_query = f"{client_name} target customers who uses ideal for best for"
        customer_results = await self.search.search(customers_query, max_results=5)

        for result in customer_results:
            body = result.get('body', '')

            # Look for target customer indicators
            if any(keyword in body.lower() for keyword in ['for', 'ideal', 'customers', 'businesses', 'companies']):
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if any(kw in sentence.lower() for kw in ['for', 'ideal', 'designed']):
                        if len(sentence) < 150:
                            profile['target_customers'].append(sentence.strip())

        profile['target_customers'] = list(set(profile['target_customers']))[:3]
        print(f"[CLIENT RESEARCH] Found {len(profile['target_customers'])} target customer types")

        # Step 5: Find use cases
        print(f"[CLIENT RESEARCH] Finding use cases...")
        usecase_query = f"{client_name} use cases examples how to use"
        usecase_results = await self.search.search(usecase_query, max_results=3)

        for result in usecase_results:
            body = result.get('body', '')

            # Extract use cases
            if 'use case' in body.lower() or 'example' in body.lower():
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if len(sentence) > 20 and len(sentence) < 150:
                        profile['use_cases'].append(sentence.strip())

        profile['use_cases'] = list(set(profile['use_cases']))[:3]
        print(f"[CLIENT RESEARCH] Found {len(profile['use_cases'])} use cases")

        # Step 6: ENHANCED - Extract key features
        print(f"[CLIENT RESEARCH] Extracting key features...")
        features_query = f"{client_name} features capabilities what it does main functions"
        features_results = await self.search.search(features_query, max_results=5)

        for result in features_results:
            title = result.get('title', '')
            body = result.get('body', '')
            combined = f"{title} {body}"

            # Store raw fact
            if body:
                profile['raw_facts'].append(f"Feature info: {body[:300]}")

            # Extract features
            if any(kw in combined.lower() for kw in ['feature', 'capability', 'function', 'tool', 'includes']):
                sentences = body.split('.')
                for sentence in sentences[:3]:
                    if any(kw in sentence.lower() for kw in ['feature', 'includes', 'provides', 'offers', 'enables']):
                        if 20 < len(sentence) < 180:
                            profile['key_features'].append(sentence.strip())

        profile['key_features'] = list(set(profile['key_features']))[:8]
        print(f"[CLIENT RESEARCH] Found {len(profile['key_features'])} key features")

        # Step 7: ENHANCED - Research pricing model
        print(f"[CLIENT RESEARCH] Researching pricing model...")
        pricing_query = f"{client_name} pricing cost plans free trial subscription"
        pricing_results = await self.search.search(pricing_query, max_results=3)

        for result in pricing_results:
            body = result.get('body', '')

            if body:
                profile['raw_facts'].append(f"Pricing info: {body[:250]}")

            # Look for pricing indicators
            if any(kw in body.lower() for kw in ['pricing', 'price', 'plan', 'subscription', 'free', 'per month', 'per user']):
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if any(kw in sentence.lower() for kw in ['price', 'plan', 'subscription', 'free', 'cost', '$']):
                        if len(sentence) < 180:
                            profile['pricing_model'] = sentence.strip()
                            break
                if profile['pricing_model']:
                    break

        print(f"[CLIENT RESEARCH] Pricing model: {profile['pricing_model'][:50] if profile['pricing_model'] else 'Not found'}...")

        # Step 8: ENHANCED - Identify competitors
        print(f"[CLIENT RESEARCH] Identifying competitors...")
        competitors_query = f"{client_name} competitors alternatives vs comparison similar to"
        competitors_results = await self.search.search(competitors_query, max_results=4)

        for result in competitors_results:
            title = result.get('title', '')
            body = result.get('body', '')

            if body:
                profile['raw_facts'].append(f"Competitive info: {body[:250]}")

            # Look for competitor mentions
            if any(kw in body.lower() for kw in ['competitor', 'alternative', 'vs', 'versus', 'similar', 'compared to']):
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if any(kw in sentence.lower() for kw in ['competitor', 'alternative', 'vs', 'compared']):
                        if len(sentence) < 150:
                            # Extract company names (simple heuristic)
                            words = sentence.split()
                            for i, word in enumerate(words):
                                if word[0].isupper() and len(word) > 3:
                                    if word not in [client_name, 'The', 'This', 'That', 'Some']:
                                        profile['competitors'].append(word)

        profile['competitors'] = list(set(profile['competitors']))[:5]
        print(f"[CLIENT RESEARCH] Found {len(profile['competitors'])} competitors")

        # Step 9: ENHANCED - Company background (founded, size, funding)
        print(f"[CLIENT RESEARCH] Researching company background...")
        background_query = f"{client_name} founded company size employees funding valuation about"
        background_results = await self.search.search(background_query, max_results=4)

        for result in background_results:
            body = result.get('body', '')

            if body:
                profile['raw_facts'].append(f"Company background: {body[:300]}")

            # Extract founded year
            if not profile['founded']:
                import re
                founded_patterns = [r'founded in (\d{4})', r'established in (\d{4})', r'started in (\d{4})']
                for pattern in founded_patterns:
                    match = re.search(pattern, body, re.IGNORECASE)
                    if match:
                        profile['founded'] = match.group(1)
                        break

            # Extract company size
            if not profile['company_size']:
                size_patterns = [
                    r'(\d+[,\d]*)\s+employees',
                    r'team of (\d+[,\d]*)',
                    r'(\d+[,\d]*)\s+people',
                    r'workforce of (\d+[,\d]*)'
                ]
                for pattern in size_patterns:
                    match = re.search(pattern, body, re.IGNORECASE)
                    if match:
                        profile['company_size'] = match.group(1) + ' employees'
                        break

            # Extract funding
            if not profile['funding']:
                funding_patterns = [
                    r'\$(\d+[,\d]*\.?\d*)\s*(million|billion)\s+funding',
                    r'raised \$(\d+[,\d]*\.?\d*)\s*(million|billion)',
                    r'valued at \$(\d+[,\d]*\.?\d*)\s*(million|billion)'
                ]
                for pattern in funding_patterns:
                    match = re.search(pattern, body, re.IGNORECASE)
                    if match:
                        amount = match.group(1)
                        unit = match.group(2)
                        profile['funding'] = f"${amount} {unit}"
                        break

        print(f"[CLIENT RESEARCH] Founded: {profile['founded'] or 'Unknown'}")
        print(f"[CLIENT RESEARCH] Company Size: {profile['company_size'] or 'Unknown'}")
        print(f"[CLIENT RESEARCH] Funding: {profile['funding'] or 'Unknown'}")

        # Step 10: ENHANCED - Integrations and Partnerships
        print(f"[CLIENT RESEARCH] Researching integrations and partnerships...")
        integrations_query = f"{client_name} integrations partners API connects with works with"
        integrations_results = await self.search.search(integrations_query, max_results=4)

        for result in integrations_results:
            body = result.get('body', '')

            if body:
                profile['raw_facts'].append(f"Integrations info: {body[:300]}")

            # Look for integration mentions
            if any(kw in body.lower() for kw in ['integrat', 'partner', 'connect', 'api', 'works with']):
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if any(kw in sentence.lower() for kw in ['integrat', 'partner', 'connect', 'api']):
                        if 20 < len(sentence) < 150:
                            profile['integrations'].append(sentence.strip())

        profile['integrations'] = list(set(profile['integrations']))[:6]
        print(f"[CLIENT RESEARCH] Found {len(profile['integrations'])} integrations/partnerships")

        # Step 11: ENHANCED - Awards and Recognition
        print(f"[CLIENT RESEARCH] Finding awards and recognition...")
        awards_query = f"{client_name} awards recognition best rated named leader"
        awards_results = await self.search.search(awards_query, max_results=3)

        for result in awards_results:
            title = result.get('title', '')
            body = result.get('body', '')

            if body:
                profile['raw_facts'].append(f"Awards info: {body[:300]}")

            # Look for awards mentions
            if any(kw in body.lower() for kw in ['award', 'recognition', 'winner', 'leader', 'best', 'rated']):
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if any(kw in sentence.lower() for kw in ['award', 'winner', 'leader', 'best', 'rated']):
                        if 20 < len(sentence) < 180:
                            profile['awards'].append(sentence.strip())

        profile['awards'] = list(set(profile['awards']))[:5]
        print(f"[CLIENT RESEARCH] Found {len(profile['awards'])} awards/recognition")

        # Step 12: ENHANCED - Customer Testimonials/Success Stories
        print(f"[CLIENT RESEARCH] Finding customer testimonials...")
        testimonials_query = f"{client_name} customer success stories testimonials case study reviews"
        testimonials_results = await self.search.search(testimonials_query, max_results=3)

        for result in testimonials_results:
            body = result.get('body', '')

            if body:
                profile['raw_facts'].append(f"Customer success info: {body[:300]}")

            # Look for testimonial indicators
            if any(kw in body.lower() for kw in ['customer', 'success', 'testimonial', 'case study', 'helped']):
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if any(kw in sentence.lower() for kw in ['helped', 'success', 'improved', 'increased', 'reduced']):
                        if 30 < len(sentence) < 200:
                            profile['customer_testimonials'].append(sentence.strip())

        profile['customer_testimonials'] = list(set(profile['customer_testimonials']))[:4]
        print(f"[CLIENT RESEARCH] Found {len(profile['customer_testimonials'])} customer testimonials")

        # Step 13: ENHANCED - Recent News and Updates
        print(f"[CLIENT RESEARCH] Finding recent news...")
        news_query = f"{client_name} news recent updates announcement launch 2024 2025"
        news_results = await self.search.search(news_query, max_results=4)

        for result in news_results:
            title = result.get('title', '')
            body = result.get('body', '')

            if body:
                profile['raw_facts'].append(f"Recent news: {body[:300]}")

            # Extract news items
            if any(kw in body.lower() for kw in ['announce', 'launch', 'new', 'update', 'release']):
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if any(kw in sentence.lower() for kw in ['announce', 'launch', 'new', 'release']):
                        if 20 < len(sentence) < 180:
                            profile['recent_news'].append(sentence.strip())

        profile['recent_news'] = list(set(profile['recent_news']))[:5]
        print(f"[CLIENT RESEARCH] Found {len(profile['recent_news'])} recent news items")

        # Step 14: ENHANCED - Market Position
        print(f"[CLIENT RESEARCH] Analyzing market position...")
        market_query = f"{client_name} market leader industry position market share rank"
        market_results = await self.search.search(market_query, max_results=3)

        for result in market_results:
            body = result.get('body', '')

            if body:
                profile['raw_facts'].append(f"Market position: {body[:300]}")

            # Look for market position indicators
            if any(kw in body.lower() for kw in ['leader', 'market', 'position', 'share', 'rank', 'top']):
                sentences = body.split('.')
                for sentence in sentences[:2]:
                    if any(kw in sentence.lower() for kw in ['leader', 'market', 'position', 'top', 'leading']):
                        if len(sentence) < 180:
                            profile['market_position'] = sentence.strip()
                            break
                if profile['market_position']:
                    break

        print(f"[CLIENT RESEARCH] Market position: {profile['market_position'][:60] if profile['market_position'] else 'Not found'}...")

        # Step 15: Scrape website for additional details
        if profile['website']:
            print(f"[CLIENT RESEARCH] Scraping website for details...")
            try:
                company_info = await self.scraper.extract_company_info(profile['website'])

                if company_info:
                    if not profile['description'] and company_info.get('description'):
                        profile['description'] = company_info['description']

                    # Update name if we got a better one
                    if company_info.get('name'):
                        profile['name'] = company_info['name']

            except Exception as e:
                logger.error(f"Error scraping client website: {e}")

        print(f"[CLIENT RESEARCH] === COMPREHENSIVE RESEARCH COMPLETE ===")
        print(f"[CLIENT RESEARCH] Name: {profile['name']}")
        print(f"[CLIENT RESEARCH] Website: {profile['website']}")
        print(f"[CLIENT RESEARCH] Industry: {profile.get('industry', 'Unknown')}")
        print(f"[CLIENT RESEARCH]")
        print(f"[CLIENT RESEARCH] COMPANY BACKGROUND:")
        print(f"[CLIENT RESEARCH]   - Founded: {profile['founded'] or 'Unknown'}")
        print(f"[CLIENT RESEARCH]   - Company Size: {profile['company_size'] or 'Unknown'}")
        print(f"[CLIENT RESEARCH]   - Funding: {profile['funding'] or 'Unknown'}")
        print(f"[CLIENT RESEARCH]   - Market Position: {profile['market_position'][:60] if profile['market_position'] else 'Not found'}...")
        print(f"[CLIENT RESEARCH]")
        print(f"[CLIENT RESEARCH] PRODUCT/SERVICE INFO:")
        print(f"[CLIENT RESEARCH]   - Offerings: {len(profile['offerings'])} extracted")
        print(f"[CLIENT RESEARCH]   - Key Features: {len(profile['key_features'])} extracted")
        print(f"[CLIENT RESEARCH]   - Integrations: {len(profile['integrations'])} found")
        print(f"[CLIENT RESEARCH]   - Pricing Model: {profile['pricing_model'][:60] if profile['pricing_model'] else 'Not found'}...")
        print(f"[CLIENT RESEARCH]")
        print(f"[CLIENT RESEARCH] MARKETING & POSITIONING:")
        print(f"[CLIENT RESEARCH]   - Value Props: {len(profile['value_propositions'])} extracted")
        print(f"[CLIENT RESEARCH]   - Target Customers: {len(profile['target_customers'])} extracted")
        print(f"[CLIENT RESEARCH]   - Use Cases: {len(profile['use_cases'])} extracted")
        print(f"[CLIENT RESEARCH]   - Differentiators: {len(profile['differentiators'])} extracted")
        print(f"[CLIENT RESEARCH]")
        print(f"[CLIENT RESEARCH] COMPETITIVE & MARKET:")
        print(f"[CLIENT RESEARCH]   - Competitors: {len(profile['competitors'])} identified")
        print(f"[CLIENT RESEARCH]   - Awards: {len(profile['awards'])} found")
        print(f"[CLIENT RESEARCH]")
        print(f"[CLIENT RESEARCH] CREDIBILITY & PROOF:")
        print(f"[CLIENT RESEARCH]   - Customer Testimonials: {len(profile['customer_testimonials'])} found")
        print(f"[CLIENT RESEARCH]   - Recent News: {len(profile['recent_news'])} items")
        print(f"[CLIENT RESEARCH]")
        print(f"[CLIENT RESEARCH] GROUNDING DATA:")
        print(f"[CLIENT RESEARCH]   - Raw Facts Collected: {len(profile['raw_facts'])} facts")
        print(f"[CLIENT RESEARCH]   - Total Extraction Depth: 15 comprehensive steps")
        print(f"[CLIENT RESEARCH] ================================================\n")

        return profile


# Legacy singleton (deprecated - use MCP instead)
_client_researcher = None


def get_client_researcher(mcp_registry: Optional['MCPRegistry'] = None) -> ClientResearcher:
    """
    Get client researcher instance

    Args:
        mcp_registry: Optional MCP registry (recommended). If provided, creates new instance.
                     If None, returns legacy singleton (deprecated)

    Returns:
        ClientResearcher instance
    """
    if mcp_registry:
        # Create new instance with MCP (recommended)
        return ClientResearcher(mcp_registry=mcp_registry)

    # Legacy singleton fallback (deprecated)
    global _client_researcher
    if _client_researcher is None:
        _client_researcher = ClientResearcher()
    return _client_researcher