cx_ai_agent_v1 / services /client_researcher.py
muzakkirhussain011's picture
Add application files (text files only)
8bab08d
"""
Client Company Researcher
Researches the CLIENT company to understand their offerings, value props, and target customers
This information is used to personalize emails TO prospects
"""
import logging
from typing import Dict, List, Optional, TYPE_CHECKING
from services.web_search import get_search_service
from services.web_scraper import WebScraperService
if TYPE_CHECKING:
from mcp.registry import MCPRegistry
logger = logging.getLogger(__name__)
class ClientResearcher:
"""
Researches CLIENT companies to understand their offerings
Now supports MCP (Model Context Protocol) for unified search interface
"""
def __init__(self, mcp_registry: Optional['MCPRegistry'] = None):
"""
Initialize client researcher
Args:
mcp_registry: Optional MCP registry for unified search (recommended)
If None, falls back to direct web search service
"""
if mcp_registry:
# Use MCP search client
self.search = mcp_registry.get_search_client()
logger.info("ClientResearcher initialized with MCP search client")
else:
# Fallback to direct search service (legacy)
self.search = get_search_service()
logger.warning("ClientResearcher initialized without MCP (consider using MCP)")
self.scraper = WebScraperService()
async def research_client(self, client_name: str) -> Dict:
"""
ENHANCED: Deep research on CLIENT company with extensive data extraction
Returns:
{
'name': str,
'website': str,
'domain': str,
'description': str,
'offerings': [str], # What they sell/offer
'value_propositions': [str], # Key benefits
'target_customers': [str], # Who they serve
'industry': str,
'use_cases': [str], # Common use cases
'differentiators': [str], # What makes them unique
'key_features': [str], # Main features
'pricing_model': str, # How they charge
'competitors': [str], # Main competitors
'founded': str, # When founded
'company_size': str, # Employee count
'funding': str, # Funding info
'raw_facts': [str] # All extracted facts for grounding
}
"""
logger.info(f"ClientResearcher: ENHANCED research for '{client_name}'")
print(f"\n[CLIENT RESEARCH] Starting ENHANCED research for '{client_name}'")
profile = {
'name': client_name,
'website': '',
'domain': '',
'description': '',
'offerings': [],
'value_propositions': [],
'target_customers': [],
'industry': '',
'use_cases': [],
'differentiators': [],
'key_features': [],
'pricing_model': '',
'competitors': [],
'founded': '',
'company_size': '',
'funding': '',
'integrations': [], # NEW: Integrations and partnerships
'awards': [], # NEW: Awards and recognition
'customer_testimonials': [], # NEW: Customer success stories
'recent_news': [], # NEW: Recent company news
'market_position': '', # NEW: Market position and leadership
'raw_facts': [] # Store all extracted facts for grounding
}
# Step 1: Find official website
print(f"[CLIENT RESEARCH] Finding official website...")
website_query = f"{client_name} official website"
website_results = await self.search.search(website_query, max_results=3)
if website_results:
profile['website'] = website_results[0].get('url', '')
profile['description'] = website_results[0].get('body', '')
# Extract domain
if profile['website']:
from urllib.parse import urlparse
parsed = urlparse(profile['website'])
profile['domain'] = parsed.netloc.replace('www.', '')
print(f"[CLIENT RESEARCH] Website: {profile['website']}")
# Step 2: Understand what they offer
print(f"[CLIENT RESEARCH] Researching offerings...")
offerings_query = f"{client_name} products services what they offer features"
offering_results = await self.search.search(offerings_query, max_results=5)
# Extract offerings from search results
for result in offering_results:
title = result.get('title', '')
body = result.get('body', '')
# Store raw fact
if body:
profile['raw_facts'].append(f"Offerings info: {body[:300]}")
# Look for key phrases
if any(keyword in body.lower() for keyword in ['offer', 'provides', 'platform', 'solution', 'service']):
# Extract the offering
sentences = body.split('.')
for sentence in sentences[:3]:
if any(kw in sentence.lower() for kw in ['offer', 'provides', 'platform', 'solution']):
profile['offerings'].append(sentence.strip())
# Deduplicate and limit
profile['offerings'] = list(set(profile['offerings']))[:5]
print(f"[CLIENT RESEARCH] Found {len(profile['offerings'])} offerings")
# Step 3: Find value propositions
print(f"[CLIENT RESEARCH] Researching value propositions...")
value_query = f"{client_name} benefits advantages why choose how it helps"
value_results = await self.search.search(value_query, max_results=5)
for result in value_results:
body = result.get('body', '')
# Store raw fact
if body:
profile['raw_facts'].append(f"Value props info: {body[:300]}")
# Look for value prop indicators
if any(keyword in body.lower() for keyword in ['help', 'benefit', 'improve', 'reduce', 'increase', 'save']):
sentences = body.split('.')
for sentence in sentences[:3]:
if any(kw in sentence.lower() for kw in ['help', 'benefit', 'improve', 'reduce', 'increase']):
if len(sentence) < 200: # Not too long
profile['value_propositions'].append(sentence.strip())
profile['value_propositions'] = list(set(profile['value_propositions']))[:5]
print(f"[CLIENT RESEARCH] Found {len(profile['value_propositions'])} value props")
# Step 4: Identify target customers
print(f"[CLIENT RESEARCH] Identifying target customers...")
customers_query = f"{client_name} target customers who uses ideal for best for"
customer_results = await self.search.search(customers_query, max_results=5)
for result in customer_results:
body = result.get('body', '')
# Look for target customer indicators
if any(keyword in body.lower() for keyword in ['for', 'ideal', 'customers', 'businesses', 'companies']):
sentences = body.split('.')
for sentence in sentences[:2]:
if any(kw in sentence.lower() for kw in ['for', 'ideal', 'designed']):
if len(sentence) < 150:
profile['target_customers'].append(sentence.strip())
profile['target_customers'] = list(set(profile['target_customers']))[:3]
print(f"[CLIENT RESEARCH] Found {len(profile['target_customers'])} target customer types")
# Step 5: Find use cases
print(f"[CLIENT RESEARCH] Finding use cases...")
usecase_query = f"{client_name} use cases examples how to use"
usecase_results = await self.search.search(usecase_query, max_results=3)
for result in usecase_results:
body = result.get('body', '')
# Extract use cases
if 'use case' in body.lower() or 'example' in body.lower():
sentences = body.split('.')
for sentence in sentences[:2]:
if len(sentence) > 20 and len(sentence) < 150:
profile['use_cases'].append(sentence.strip())
profile['use_cases'] = list(set(profile['use_cases']))[:3]
print(f"[CLIENT RESEARCH] Found {len(profile['use_cases'])} use cases")
# Step 6: ENHANCED - Extract key features
print(f"[CLIENT RESEARCH] Extracting key features...")
features_query = f"{client_name} features capabilities what it does main functions"
features_results = await self.search.search(features_query, max_results=5)
for result in features_results:
title = result.get('title', '')
body = result.get('body', '')
combined = f"{title} {body}"
# Store raw fact
if body:
profile['raw_facts'].append(f"Feature info: {body[:300]}")
# Extract features
if any(kw in combined.lower() for kw in ['feature', 'capability', 'function', 'tool', 'includes']):
sentences = body.split('.')
for sentence in sentences[:3]:
if any(kw in sentence.lower() for kw in ['feature', 'includes', 'provides', 'offers', 'enables']):
if 20 < len(sentence) < 180:
profile['key_features'].append(sentence.strip())
profile['key_features'] = list(set(profile['key_features']))[:8]
print(f"[CLIENT RESEARCH] Found {len(profile['key_features'])} key features")
# Step 7: ENHANCED - Research pricing model
print(f"[CLIENT RESEARCH] Researching pricing model...")
pricing_query = f"{client_name} pricing cost plans free trial subscription"
pricing_results = await self.search.search(pricing_query, max_results=3)
for result in pricing_results:
body = result.get('body', '')
if body:
profile['raw_facts'].append(f"Pricing info: {body[:250]}")
# Look for pricing indicators
if any(kw in body.lower() for kw in ['pricing', 'price', 'plan', 'subscription', 'free', 'per month', 'per user']):
sentences = body.split('.')
for sentence in sentences[:2]:
if any(kw in sentence.lower() for kw in ['price', 'plan', 'subscription', 'free', 'cost', '$']):
if len(sentence) < 180:
profile['pricing_model'] = sentence.strip()
break
if profile['pricing_model']:
break
print(f"[CLIENT RESEARCH] Pricing model: {profile['pricing_model'][:50] if profile['pricing_model'] else 'Not found'}...")
# Step 8: ENHANCED - Identify competitors
print(f"[CLIENT RESEARCH] Identifying competitors...")
competitors_query = f"{client_name} competitors alternatives vs comparison similar to"
competitors_results = await self.search.search(competitors_query, max_results=4)
for result in competitors_results:
title = result.get('title', '')
body = result.get('body', '')
if body:
profile['raw_facts'].append(f"Competitive info: {body[:250]}")
# Look for competitor mentions
if any(kw in body.lower() for kw in ['competitor', 'alternative', 'vs', 'versus', 'similar', 'compared to']):
sentences = body.split('.')
for sentence in sentences[:2]:
if any(kw in sentence.lower() for kw in ['competitor', 'alternative', 'vs', 'compared']):
if len(sentence) < 150:
# Extract company names (simple heuristic)
words = sentence.split()
for i, word in enumerate(words):
if word[0].isupper() and len(word) > 3:
if word not in [client_name, 'The', 'This', 'That', 'Some']:
profile['competitors'].append(word)
profile['competitors'] = list(set(profile['competitors']))[:5]
print(f"[CLIENT RESEARCH] Found {len(profile['competitors'])} competitors")
# Step 9: ENHANCED - Company background (founded, size, funding)
print(f"[CLIENT RESEARCH] Researching company background...")
background_query = f"{client_name} founded company size employees funding valuation about"
background_results = await self.search.search(background_query, max_results=4)
for result in background_results:
body = result.get('body', '')
if body:
profile['raw_facts'].append(f"Company background: {body[:300]}")
# Extract founded year
if not profile['founded']:
import re
founded_patterns = [r'founded in (\d{4})', r'established in (\d{4})', r'started in (\d{4})']
for pattern in founded_patterns:
match = re.search(pattern, body, re.IGNORECASE)
if match:
profile['founded'] = match.group(1)
break
# Extract company size
if not profile['company_size']:
size_patterns = [
r'(\d+[,\d]*)\s+employees',
r'team of (\d+[,\d]*)',
r'(\d+[,\d]*)\s+people',
r'workforce of (\d+[,\d]*)'
]
for pattern in size_patterns:
match = re.search(pattern, body, re.IGNORECASE)
if match:
profile['company_size'] = match.group(1) + ' employees'
break
# Extract funding
if not profile['funding']:
funding_patterns = [
r'\$(\d+[,\d]*\.?\d*)\s*(million|billion)\s+funding',
r'raised \$(\d+[,\d]*\.?\d*)\s*(million|billion)',
r'valued at \$(\d+[,\d]*\.?\d*)\s*(million|billion)'
]
for pattern in funding_patterns:
match = re.search(pattern, body, re.IGNORECASE)
if match:
amount = match.group(1)
unit = match.group(2)
profile['funding'] = f"${amount} {unit}"
break
print(f"[CLIENT RESEARCH] Founded: {profile['founded'] or 'Unknown'}")
print(f"[CLIENT RESEARCH] Company Size: {profile['company_size'] or 'Unknown'}")
print(f"[CLIENT RESEARCH] Funding: {profile['funding'] or 'Unknown'}")
# Step 10: ENHANCED - Integrations and Partnerships
print(f"[CLIENT RESEARCH] Researching integrations and partnerships...")
integrations_query = f"{client_name} integrations partners API connects with works with"
integrations_results = await self.search.search(integrations_query, max_results=4)
for result in integrations_results:
body = result.get('body', '')
if body:
profile['raw_facts'].append(f"Integrations info: {body[:300]}")
# Look for integration mentions
if any(kw in body.lower() for kw in ['integrat', 'partner', 'connect', 'api', 'works with']):
sentences = body.split('.')
for sentence in sentences[:2]:
if any(kw in sentence.lower() for kw in ['integrat', 'partner', 'connect', 'api']):
if 20 < len(sentence) < 150:
profile['integrations'].append(sentence.strip())
profile['integrations'] = list(set(profile['integrations']))[:6]
print(f"[CLIENT RESEARCH] Found {len(profile['integrations'])} integrations/partnerships")
# Step 11: ENHANCED - Awards and Recognition
print(f"[CLIENT RESEARCH] Finding awards and recognition...")
awards_query = f"{client_name} awards recognition best rated named leader"
awards_results = await self.search.search(awards_query, max_results=3)
for result in awards_results:
title = result.get('title', '')
body = result.get('body', '')
if body:
profile['raw_facts'].append(f"Awards info: {body[:300]}")
# Look for awards mentions
if any(kw in body.lower() for kw in ['award', 'recognition', 'winner', 'leader', 'best', 'rated']):
sentences = body.split('.')
for sentence in sentences[:2]:
if any(kw in sentence.lower() for kw in ['award', 'winner', 'leader', 'best', 'rated']):
if 20 < len(sentence) < 180:
profile['awards'].append(sentence.strip())
profile['awards'] = list(set(profile['awards']))[:5]
print(f"[CLIENT RESEARCH] Found {len(profile['awards'])} awards/recognition")
# Step 12: ENHANCED - Customer Testimonials/Success Stories
print(f"[CLIENT RESEARCH] Finding customer testimonials...")
testimonials_query = f"{client_name} customer success stories testimonials case study reviews"
testimonials_results = await self.search.search(testimonials_query, max_results=3)
for result in testimonials_results:
body = result.get('body', '')
if body:
profile['raw_facts'].append(f"Customer success info: {body[:300]}")
# Look for testimonial indicators
if any(kw in body.lower() for kw in ['customer', 'success', 'testimonial', 'case study', 'helped']):
sentences = body.split('.')
for sentence in sentences[:2]:
if any(kw in sentence.lower() for kw in ['helped', 'success', 'improved', 'increased', 'reduced']):
if 30 < len(sentence) < 200:
profile['customer_testimonials'].append(sentence.strip())
profile['customer_testimonials'] = list(set(profile['customer_testimonials']))[:4]
print(f"[CLIENT RESEARCH] Found {len(profile['customer_testimonials'])} customer testimonials")
# Step 13: ENHANCED - Recent News and Updates
print(f"[CLIENT RESEARCH] Finding recent news...")
news_query = f"{client_name} news recent updates announcement launch 2024 2025"
news_results = await self.search.search(news_query, max_results=4)
for result in news_results:
title = result.get('title', '')
body = result.get('body', '')
if body:
profile['raw_facts'].append(f"Recent news: {body[:300]}")
# Extract news items
if any(kw in body.lower() for kw in ['announce', 'launch', 'new', 'update', 'release']):
sentences = body.split('.')
for sentence in sentences[:2]:
if any(kw in sentence.lower() for kw in ['announce', 'launch', 'new', 'release']):
if 20 < len(sentence) < 180:
profile['recent_news'].append(sentence.strip())
profile['recent_news'] = list(set(profile['recent_news']))[:5]
print(f"[CLIENT RESEARCH] Found {len(profile['recent_news'])} recent news items")
# Step 14: ENHANCED - Market Position
print(f"[CLIENT RESEARCH] Analyzing market position...")
market_query = f"{client_name} market leader industry position market share rank"
market_results = await self.search.search(market_query, max_results=3)
for result in market_results:
body = result.get('body', '')
if body:
profile['raw_facts'].append(f"Market position: {body[:300]}")
# Look for market position indicators
if any(kw in body.lower() for kw in ['leader', 'market', 'position', 'share', 'rank', 'top']):
sentences = body.split('.')
for sentence in sentences[:2]:
if any(kw in sentence.lower() for kw in ['leader', 'market', 'position', 'top', 'leading']):
if len(sentence) < 180:
profile['market_position'] = sentence.strip()
break
if profile['market_position']:
break
print(f"[CLIENT RESEARCH] Market position: {profile['market_position'][:60] if profile['market_position'] else 'Not found'}...")
# Step 15: Scrape website for additional details
if profile['website']:
print(f"[CLIENT RESEARCH] Scraping website for details...")
try:
company_info = await self.scraper.extract_company_info(profile['website'])
if company_info:
if not profile['description'] and company_info.get('description'):
profile['description'] = company_info['description']
# Update name if we got a better one
if company_info.get('name'):
profile['name'] = company_info['name']
except Exception as e:
logger.error(f"Error scraping client website: {e}")
print(f"[CLIENT RESEARCH] === COMPREHENSIVE RESEARCH COMPLETE ===")
print(f"[CLIENT RESEARCH] Name: {profile['name']}")
print(f"[CLIENT RESEARCH] Website: {profile['website']}")
print(f"[CLIENT RESEARCH] Industry: {profile.get('industry', 'Unknown')}")
print(f"[CLIENT RESEARCH]")
print(f"[CLIENT RESEARCH] COMPANY BACKGROUND:")
print(f"[CLIENT RESEARCH] - Founded: {profile['founded'] or 'Unknown'}")
print(f"[CLIENT RESEARCH] - Company Size: {profile['company_size'] or 'Unknown'}")
print(f"[CLIENT RESEARCH] - Funding: {profile['funding'] or 'Unknown'}")
print(f"[CLIENT RESEARCH] - Market Position: {profile['market_position'][:60] if profile['market_position'] else 'Not found'}...")
print(f"[CLIENT RESEARCH]")
print(f"[CLIENT RESEARCH] PRODUCT/SERVICE INFO:")
print(f"[CLIENT RESEARCH] - Offerings: {len(profile['offerings'])} extracted")
print(f"[CLIENT RESEARCH] - Key Features: {len(profile['key_features'])} extracted")
print(f"[CLIENT RESEARCH] - Integrations: {len(profile['integrations'])} found")
print(f"[CLIENT RESEARCH] - Pricing Model: {profile['pricing_model'][:60] if profile['pricing_model'] else 'Not found'}...")
print(f"[CLIENT RESEARCH]")
print(f"[CLIENT RESEARCH] MARKETING & POSITIONING:")
print(f"[CLIENT RESEARCH] - Value Props: {len(profile['value_propositions'])} extracted")
print(f"[CLIENT RESEARCH] - Target Customers: {len(profile['target_customers'])} extracted")
print(f"[CLIENT RESEARCH] - Use Cases: {len(profile['use_cases'])} extracted")
print(f"[CLIENT RESEARCH] - Differentiators: {len(profile['differentiators'])} extracted")
print(f"[CLIENT RESEARCH]")
print(f"[CLIENT RESEARCH] COMPETITIVE & MARKET:")
print(f"[CLIENT RESEARCH] - Competitors: {len(profile['competitors'])} identified")
print(f"[CLIENT RESEARCH] - Awards: {len(profile['awards'])} found")
print(f"[CLIENT RESEARCH]")
print(f"[CLIENT RESEARCH] CREDIBILITY & PROOF:")
print(f"[CLIENT RESEARCH] - Customer Testimonials: {len(profile['customer_testimonials'])} found")
print(f"[CLIENT RESEARCH] - Recent News: {len(profile['recent_news'])} items")
print(f"[CLIENT RESEARCH]")
print(f"[CLIENT RESEARCH] GROUNDING DATA:")
print(f"[CLIENT RESEARCH] - Raw Facts Collected: {len(profile['raw_facts'])} facts")
print(f"[CLIENT RESEARCH] - Total Extraction Depth: 15 comprehensive steps")
print(f"[CLIENT RESEARCH] ================================================\n")
return profile
# Legacy singleton (deprecated - use MCP instead)
_client_researcher = None
def get_client_researcher(mcp_registry: Optional['MCPRegistry'] = None) -> ClientResearcher:
"""
Get client researcher instance
Args:
mcp_registry: Optional MCP registry (recommended). If provided, creates new instance.
If None, returns legacy singleton (deprecated)
Returns:
ClientResearcher instance
"""
if mcp_registry:
# Create new instance with MCP (recommended)
return ClientResearcher(mcp_registry=mcp_registry)
# Legacy singleton fallback (deprecated)
global _client_researcher
if _client_researcher is None:
_client_researcher = ClientResearcher()
return _client_researcher