""" Client Company Researcher Researches the CLIENT company to understand their offerings, value props, and target customers This information is used to personalize emails TO prospects """ import logging from typing import Dict, List, Optional, TYPE_CHECKING from services.web_search import get_search_service from services.web_scraper import WebScraperService if TYPE_CHECKING: from mcp.registry import MCPRegistry logger = logging.getLogger(__name__) class ClientResearcher: """ Researches CLIENT companies to understand their offerings Now supports MCP (Model Context Protocol) for unified search interface """ def __init__(self, mcp_registry: Optional['MCPRegistry'] = None): """ Initialize client researcher Args: mcp_registry: Optional MCP registry for unified search (recommended) If None, falls back to direct web search service """ if mcp_registry: # Use MCP search client self.search = mcp_registry.get_search_client() logger.info("ClientResearcher initialized with MCP search client") else: # Fallback to direct search service (legacy) self.search = get_search_service() logger.warning("ClientResearcher initialized without MCP (consider using MCP)") self.scraper = WebScraperService() async def research_client(self, client_name: str) -> Dict: """ ENHANCED: Deep research on CLIENT company with extensive data extraction Returns: { 'name': str, 'website': str, 'domain': str, 'description': str, 'offerings': [str], # What they sell/offer 'value_propositions': [str], # Key benefits 'target_customers': [str], # Who they serve 'industry': str, 'use_cases': [str], # Common use cases 'differentiators': [str], # What makes them unique 'key_features': [str], # Main features 'pricing_model': str, # How they charge 'competitors': [str], # Main competitors 'founded': str, # When founded 'company_size': str, # Employee count 'funding': str, # Funding info 'raw_facts': [str] # All extracted facts for grounding } """ logger.info(f"ClientResearcher: ENHANCED research for '{client_name}'") print(f"\n[CLIENT RESEARCH] Starting ENHANCED research for '{client_name}'") profile = { 'name': client_name, 'website': '', 'domain': '', 'description': '', 'offerings': [], 'value_propositions': [], 'target_customers': [], 'industry': '', 'use_cases': [], 'differentiators': [], 'key_features': [], 'pricing_model': '', 'competitors': [], 'founded': '', 'company_size': '', 'funding': '', 'integrations': [], # NEW: Integrations and partnerships 'awards': [], # NEW: Awards and recognition 'customer_testimonials': [], # NEW: Customer success stories 'recent_news': [], # NEW: Recent company news 'market_position': '', # NEW: Market position and leadership 'raw_facts': [] # Store all extracted facts for grounding } # Step 1: Find official website print(f"[CLIENT RESEARCH] Finding official website...") website_query = f"{client_name} official website" website_results = await self.search.search(website_query, max_results=3) if website_results: profile['website'] = website_results[0].get('url', '') profile['description'] = website_results[0].get('body', '') # Extract domain if profile['website']: from urllib.parse import urlparse parsed = urlparse(profile['website']) profile['domain'] = parsed.netloc.replace('www.', '') print(f"[CLIENT RESEARCH] Website: {profile['website']}") # Step 2: Understand what they offer print(f"[CLIENT RESEARCH] Researching offerings...") offerings_query = f"{client_name} products services what they offer features" offering_results = await self.search.search(offerings_query, max_results=5) # Extract offerings from search results for result in offering_results: title = result.get('title', '') body = result.get('body', '') # Store raw fact if body: profile['raw_facts'].append(f"Offerings info: {body[:300]}") # Look for key phrases if any(keyword in body.lower() for keyword in ['offer', 'provides', 'platform', 'solution', 'service']): # Extract the offering sentences = body.split('.') for sentence in sentences[:3]: if any(kw in sentence.lower() for kw in ['offer', 'provides', 'platform', 'solution']): profile['offerings'].append(sentence.strip()) # Deduplicate and limit profile['offerings'] = list(set(profile['offerings']))[:5] print(f"[CLIENT RESEARCH] Found {len(profile['offerings'])} offerings") # Step 3: Find value propositions print(f"[CLIENT RESEARCH] Researching value propositions...") value_query = f"{client_name} benefits advantages why choose how it helps" value_results = await self.search.search(value_query, max_results=5) for result in value_results: body = result.get('body', '') # Store raw fact if body: profile['raw_facts'].append(f"Value props info: {body[:300]}") # Look for value prop indicators if any(keyword in body.lower() for keyword in ['help', 'benefit', 'improve', 'reduce', 'increase', 'save']): sentences = body.split('.') for sentence in sentences[:3]: if any(kw in sentence.lower() for kw in ['help', 'benefit', 'improve', 'reduce', 'increase']): if len(sentence) < 200: # Not too long profile['value_propositions'].append(sentence.strip()) profile['value_propositions'] = list(set(profile['value_propositions']))[:5] print(f"[CLIENT RESEARCH] Found {len(profile['value_propositions'])} value props") # Step 4: Identify target customers print(f"[CLIENT RESEARCH] Identifying target customers...") customers_query = f"{client_name} target customers who uses ideal for best for" customer_results = await self.search.search(customers_query, max_results=5) for result in customer_results: body = result.get('body', '') # Look for target customer indicators if any(keyword in body.lower() for keyword in ['for', 'ideal', 'customers', 'businesses', 'companies']): sentences = body.split('.') for sentence in sentences[:2]: if any(kw in sentence.lower() for kw in ['for', 'ideal', 'designed']): if len(sentence) < 150: profile['target_customers'].append(sentence.strip()) profile['target_customers'] = list(set(profile['target_customers']))[:3] print(f"[CLIENT RESEARCH] Found {len(profile['target_customers'])} target customer types") # Step 5: Find use cases print(f"[CLIENT RESEARCH] Finding use cases...") usecase_query = f"{client_name} use cases examples how to use" usecase_results = await self.search.search(usecase_query, max_results=3) for result in usecase_results: body = result.get('body', '') # Extract use cases if 'use case' in body.lower() or 'example' in body.lower(): sentences = body.split('.') for sentence in sentences[:2]: if len(sentence) > 20 and len(sentence) < 150: profile['use_cases'].append(sentence.strip()) profile['use_cases'] = list(set(profile['use_cases']))[:3] print(f"[CLIENT RESEARCH] Found {len(profile['use_cases'])} use cases") # Step 6: ENHANCED - Extract key features print(f"[CLIENT RESEARCH] Extracting key features...") features_query = f"{client_name} features capabilities what it does main functions" features_results = await self.search.search(features_query, max_results=5) for result in features_results: title = result.get('title', '') body = result.get('body', '') combined = f"{title} {body}" # Store raw fact if body: profile['raw_facts'].append(f"Feature info: {body[:300]}") # Extract features if any(kw in combined.lower() for kw in ['feature', 'capability', 'function', 'tool', 'includes']): sentences = body.split('.') for sentence in sentences[:3]: if any(kw in sentence.lower() for kw in ['feature', 'includes', 'provides', 'offers', 'enables']): if 20 < len(sentence) < 180: profile['key_features'].append(sentence.strip()) profile['key_features'] = list(set(profile['key_features']))[:8] print(f"[CLIENT RESEARCH] Found {len(profile['key_features'])} key features") # Step 7: ENHANCED - Research pricing model print(f"[CLIENT RESEARCH] Researching pricing model...") pricing_query = f"{client_name} pricing cost plans free trial subscription" pricing_results = await self.search.search(pricing_query, max_results=3) for result in pricing_results: body = result.get('body', '') if body: profile['raw_facts'].append(f"Pricing info: {body[:250]}") # Look for pricing indicators if any(kw in body.lower() for kw in ['pricing', 'price', 'plan', 'subscription', 'free', 'per month', 'per user']): sentences = body.split('.') for sentence in sentences[:2]: if any(kw in sentence.lower() for kw in ['price', 'plan', 'subscription', 'free', 'cost', '$']): if len(sentence) < 180: profile['pricing_model'] = sentence.strip() break if profile['pricing_model']: break print(f"[CLIENT RESEARCH] Pricing model: {profile['pricing_model'][:50] if profile['pricing_model'] else 'Not found'}...") # Step 8: ENHANCED - Identify competitors print(f"[CLIENT RESEARCH] Identifying competitors...") competitors_query = f"{client_name} competitors alternatives vs comparison similar to" competitors_results = await self.search.search(competitors_query, max_results=4) for result in competitors_results: title = result.get('title', '') body = result.get('body', '') if body: profile['raw_facts'].append(f"Competitive info: {body[:250]}") # Look for competitor mentions if any(kw in body.lower() for kw in ['competitor', 'alternative', 'vs', 'versus', 'similar', 'compared to']): sentences = body.split('.') for sentence in sentences[:2]: if any(kw in sentence.lower() for kw in ['competitor', 'alternative', 'vs', 'compared']): if len(sentence) < 150: # Extract company names (simple heuristic) words = sentence.split() for i, word in enumerate(words): if word[0].isupper() and len(word) > 3: if word not in [client_name, 'The', 'This', 'That', 'Some']: profile['competitors'].append(word) profile['competitors'] = list(set(profile['competitors']))[:5] print(f"[CLIENT RESEARCH] Found {len(profile['competitors'])} competitors") # Step 9: ENHANCED - Company background (founded, size, funding) print(f"[CLIENT RESEARCH] Researching company background...") background_query = f"{client_name} founded company size employees funding valuation about" background_results = await self.search.search(background_query, max_results=4) for result in background_results: body = result.get('body', '') if body: profile['raw_facts'].append(f"Company background: {body[:300]}") # Extract founded year if not profile['founded']: import re founded_patterns = [r'founded in (\d{4})', r'established in (\d{4})', r'started in (\d{4})'] for pattern in founded_patterns: match = re.search(pattern, body, re.IGNORECASE) if match: profile['founded'] = match.group(1) break # Extract company size if not profile['company_size']: size_patterns = [ r'(\d+[,\d]*)\s+employees', r'team of (\d+[,\d]*)', r'(\d+[,\d]*)\s+people', r'workforce of (\d+[,\d]*)' ] for pattern in size_patterns: match = re.search(pattern, body, re.IGNORECASE) if match: profile['company_size'] = match.group(1) + ' employees' break # Extract funding if not profile['funding']: funding_patterns = [ r'\$(\d+[,\d]*\.?\d*)\s*(million|billion)\s+funding', r'raised \$(\d+[,\d]*\.?\d*)\s*(million|billion)', r'valued at \$(\d+[,\d]*\.?\d*)\s*(million|billion)' ] for pattern in funding_patterns: match = re.search(pattern, body, re.IGNORECASE) if match: amount = match.group(1) unit = match.group(2) profile['funding'] = f"${amount} {unit}" break print(f"[CLIENT RESEARCH] Founded: {profile['founded'] or 'Unknown'}") print(f"[CLIENT RESEARCH] Company Size: {profile['company_size'] or 'Unknown'}") print(f"[CLIENT RESEARCH] Funding: {profile['funding'] or 'Unknown'}") # Step 10: ENHANCED - Integrations and Partnerships print(f"[CLIENT RESEARCH] Researching integrations and partnerships...") integrations_query = f"{client_name} integrations partners API connects with works with" integrations_results = await self.search.search(integrations_query, max_results=4) for result in integrations_results: body = result.get('body', '') if body: profile['raw_facts'].append(f"Integrations info: {body[:300]}") # Look for integration mentions if any(kw in body.lower() for kw in ['integrat', 'partner', 'connect', 'api', 'works with']): sentences = body.split('.') for sentence in sentences[:2]: if any(kw in sentence.lower() for kw in ['integrat', 'partner', 'connect', 'api']): if 20 < len(sentence) < 150: profile['integrations'].append(sentence.strip()) profile['integrations'] = list(set(profile['integrations']))[:6] print(f"[CLIENT RESEARCH] Found {len(profile['integrations'])} integrations/partnerships") # Step 11: ENHANCED - Awards and Recognition print(f"[CLIENT RESEARCH] Finding awards and recognition...") awards_query = f"{client_name} awards recognition best rated named leader" awards_results = await self.search.search(awards_query, max_results=3) for result in awards_results: title = result.get('title', '') body = result.get('body', '') if body: profile['raw_facts'].append(f"Awards info: {body[:300]}") # Look for awards mentions if any(kw in body.lower() for kw in ['award', 'recognition', 'winner', 'leader', 'best', 'rated']): sentences = body.split('.') for sentence in sentences[:2]: if any(kw in sentence.lower() for kw in ['award', 'winner', 'leader', 'best', 'rated']): if 20 < len(sentence) < 180: profile['awards'].append(sentence.strip()) profile['awards'] = list(set(profile['awards']))[:5] print(f"[CLIENT RESEARCH] Found {len(profile['awards'])} awards/recognition") # Step 12: ENHANCED - Customer Testimonials/Success Stories print(f"[CLIENT RESEARCH] Finding customer testimonials...") testimonials_query = f"{client_name} customer success stories testimonials case study reviews" testimonials_results = await self.search.search(testimonials_query, max_results=3) for result in testimonials_results: body = result.get('body', '') if body: profile['raw_facts'].append(f"Customer success info: {body[:300]}") # Look for testimonial indicators if any(kw in body.lower() for kw in ['customer', 'success', 'testimonial', 'case study', 'helped']): sentences = body.split('.') for sentence in sentences[:2]: if any(kw in sentence.lower() for kw in ['helped', 'success', 'improved', 'increased', 'reduced']): if 30 < len(sentence) < 200: profile['customer_testimonials'].append(sentence.strip()) profile['customer_testimonials'] = list(set(profile['customer_testimonials']))[:4] print(f"[CLIENT RESEARCH] Found {len(profile['customer_testimonials'])} customer testimonials") # Step 13: ENHANCED - Recent News and Updates print(f"[CLIENT RESEARCH] Finding recent news...") news_query = f"{client_name} news recent updates announcement launch 2024 2025" news_results = await self.search.search(news_query, max_results=4) for result in news_results: title = result.get('title', '') body = result.get('body', '') if body: profile['raw_facts'].append(f"Recent news: {body[:300]}") # Extract news items if any(kw in body.lower() for kw in ['announce', 'launch', 'new', 'update', 'release']): sentences = body.split('.') for sentence in sentences[:2]: if any(kw in sentence.lower() for kw in ['announce', 'launch', 'new', 'release']): if 20 < len(sentence) < 180: profile['recent_news'].append(sentence.strip()) profile['recent_news'] = list(set(profile['recent_news']))[:5] print(f"[CLIENT RESEARCH] Found {len(profile['recent_news'])} recent news items") # Step 14: ENHANCED - Market Position print(f"[CLIENT RESEARCH] Analyzing market position...") market_query = f"{client_name} market leader industry position market share rank" market_results = await self.search.search(market_query, max_results=3) for result in market_results: body = result.get('body', '') if body: profile['raw_facts'].append(f"Market position: {body[:300]}") # Look for market position indicators if any(kw in body.lower() for kw in ['leader', 'market', 'position', 'share', 'rank', 'top']): sentences = body.split('.') for sentence in sentences[:2]: if any(kw in sentence.lower() for kw in ['leader', 'market', 'position', 'top', 'leading']): if len(sentence) < 180: profile['market_position'] = sentence.strip() break if profile['market_position']: break print(f"[CLIENT RESEARCH] Market position: {profile['market_position'][:60] if profile['market_position'] else 'Not found'}...") # Step 15: Scrape website for additional details if profile['website']: print(f"[CLIENT RESEARCH] Scraping website for details...") try: company_info = await self.scraper.extract_company_info(profile['website']) if company_info: if not profile['description'] and company_info.get('description'): profile['description'] = company_info['description'] # Update name if we got a better one if company_info.get('name'): profile['name'] = company_info['name'] except Exception as e: logger.error(f"Error scraping client website: {e}") print(f"[CLIENT RESEARCH] === COMPREHENSIVE RESEARCH COMPLETE ===") print(f"[CLIENT RESEARCH] Name: {profile['name']}") print(f"[CLIENT RESEARCH] Website: {profile['website']}") print(f"[CLIENT RESEARCH] Industry: {profile.get('industry', 'Unknown')}") print(f"[CLIENT RESEARCH]") print(f"[CLIENT RESEARCH] COMPANY BACKGROUND:") print(f"[CLIENT RESEARCH] - Founded: {profile['founded'] or 'Unknown'}") print(f"[CLIENT RESEARCH] - Company Size: {profile['company_size'] or 'Unknown'}") print(f"[CLIENT RESEARCH] - Funding: {profile['funding'] or 'Unknown'}") print(f"[CLIENT RESEARCH] - Market Position: {profile['market_position'][:60] if profile['market_position'] else 'Not found'}...") print(f"[CLIENT RESEARCH]") print(f"[CLIENT RESEARCH] PRODUCT/SERVICE INFO:") print(f"[CLIENT RESEARCH] - Offerings: {len(profile['offerings'])} extracted") print(f"[CLIENT RESEARCH] - Key Features: {len(profile['key_features'])} extracted") print(f"[CLIENT RESEARCH] - Integrations: {len(profile['integrations'])} found") print(f"[CLIENT RESEARCH] - Pricing Model: {profile['pricing_model'][:60] if profile['pricing_model'] else 'Not found'}...") print(f"[CLIENT RESEARCH]") print(f"[CLIENT RESEARCH] MARKETING & POSITIONING:") print(f"[CLIENT RESEARCH] - Value Props: {len(profile['value_propositions'])} extracted") print(f"[CLIENT RESEARCH] - Target Customers: {len(profile['target_customers'])} extracted") print(f"[CLIENT RESEARCH] - Use Cases: {len(profile['use_cases'])} extracted") print(f"[CLIENT RESEARCH] - Differentiators: {len(profile['differentiators'])} extracted") print(f"[CLIENT RESEARCH]") print(f"[CLIENT RESEARCH] COMPETITIVE & MARKET:") print(f"[CLIENT RESEARCH] - Competitors: {len(profile['competitors'])} identified") print(f"[CLIENT RESEARCH] - Awards: {len(profile['awards'])} found") print(f"[CLIENT RESEARCH]") print(f"[CLIENT RESEARCH] CREDIBILITY & PROOF:") print(f"[CLIENT RESEARCH] - Customer Testimonials: {len(profile['customer_testimonials'])} found") print(f"[CLIENT RESEARCH] - Recent News: {len(profile['recent_news'])} items") print(f"[CLIENT RESEARCH]") print(f"[CLIENT RESEARCH] GROUNDING DATA:") print(f"[CLIENT RESEARCH] - Raw Facts Collected: {len(profile['raw_facts'])} facts") print(f"[CLIENT RESEARCH] - Total Extraction Depth: 15 comprehensive steps") print(f"[CLIENT RESEARCH] ================================================\n") return profile # Legacy singleton (deprecated - use MCP instead) _client_researcher = None def get_client_researcher(mcp_registry: Optional['MCPRegistry'] = None) -> ClientResearcher: """ Get client researcher instance Args: mcp_registry: Optional MCP registry (recommended). If provided, creates new instance. If None, returns legacy singleton (deprecated) Returns: ClientResearcher instance """ if mcp_registry: # Create new instance with MCP (recommended) return ClientResearcher(mcp_registry=mcp_registry) # Legacy singleton fallback (deprecated) global _client_researcher if _client_researcher is None: _client_researcher = ClientResearcher() return _client_researcher