Spaces:

HydraBolt
/

LinkedinAgent

Sleeping

LinkedinAgent / app /services /linkedin_search.py

Hydra-Bolt

add

3856f78 10 months ago

65.2 kB

	import requests
	import time
	import re
	from typing import List, Dict, Optional, Any
	from urllib.parse import urlparse, parse_qs
	import logging
	from bs4 import BeautifulSoup
	import json

	from app.utils.config import Config
	from app.services.cache_service import CacheService

	logger = logging.getLogger(__name__)

	class LinkedInSearchService:
	"""Service for searching LinkedIn profiles using Google Custom Search API with fallbacks"""

	def __init__(self):
	self.api_key = Config.GOOGLE_API_KEY
	self.cse_id = Config.GOOGLE_CSE_ID
	self.base_url = "https://www.googleapis.com/customsearch/v1"
	self.delay = Config.SEARCH_DELAY
	self.session = requests.Session()

	# Initialize cache service
	self.cache_service = CacheService()

	# Set headers to mimic a real browser
	self.session.headers.update({
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.5',
	'Accept-Encoding': 'gzip, deflate',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1',
	})

	def search_linkedin_profiles(self, job_description: str, location: Optional[str] = None, max_results: int = 10) -> List[Dict]:
	"""
	Search for LinkedIn profiles based on job description and location

	Args:
	job_description: Job requirements and description
	location: Preferred location (optional)
	max_results: Maximum number of profiles to return

	Returns:
	List of candidate profile dictionaries
	"""
	try:
	logger.info(f"🔍 Starting LinkedIn profile search for: {job_description[:100]}...")
	logger.info(f"📍 Location: {location or 'Any'}")
	logger.info(f"📊 Max results requested: {max_results}")

	# Check cache first
	cached_results = self.cache_service.get_search_results(job_description, location, max_results)
	if cached_results:
	logger.info(f"🎯 Returning {len(cached_results)} cached search results")
	return cached_results

	# Check if we have valid API credentials
	if not self._validate_api_credentials():
	logger.warning("⚠️ Invalid or missing API credentials. Using fallback search methods.")
	fallback_results = self._fallback_search(job_description, location, max_results)
	# Cache fallback results
	self.cache_service.set_search_results(job_description, location, max_results, fallback_results)
	return fallback_results

	# Build multiple search queries for better coverage
	logger.info("📝 Building search queries...")
	search_queries = self._build_multiple_search_queries(job_description, location)
	logger.info(f"✅ Built {len(search_queries)} search queries")

	# Perform searches with different queries
	logger.info("🌐 Performing Google searches...")
	all_search_results = []

	# Calculate results per query to ensure we get enough results
	# Instead of dividing max_results by number of queries, we'll request more per query
	# and then limit the total results later
	results_per_query = max(5, max_results // 2) # At least 5 results per query, or half of max_results

	for i, query in enumerate(search_queries, 1):
	logger.info(f"🔎 Search {i}/{len(search_queries)}: {query[:80]}...")
	results = self._perform_google_search(query, results_per_query)
	logger.info(f"📈 Found {len(results)} results for query {i}")
	all_search_results.extend(results)
	if i < len(search_queries):
	logger.info(f"⏳ Waiting {self.delay}s before next search...")
	time.sleep(self.delay) # Rate limiting between queries

	logger.info(f"📊 Total search results before deduplication: {len(all_search_results)}")

	# If no results from Google API, try fallback methods
	if not all_search_results:
	logger.warning("⚠️ No results from Google API. Trying fallback search methods...")
	fallback_results = self._fallback_search(job_description, location, max_results)
	# Cache fallback results
	self.cache_service.set_search_results(job_description, location, max_results, fallback_results)
	return fallback_results

	# Remove duplicates based on profile URL
	logger.info("🔄 Deduplicating search results...")
	unique_results = self._deduplicate_search_results(all_search_results)
	logger.info(f"✅ After deduplication: {len(unique_results)} unique profiles")

	# Extract and parse LinkedIn profiles
	logger.info("🔧 Extracting profile data...")
	candidates = self._extract_profile_data(unique_results[:max_results])

	# Cache the results
	self.cache_service.set_search_results(job_description, location, max_results, candidates)

	logger.info(f"🎉 Search completed! Found {len(candidates)} LinkedIn profiles using {len(search_queries)} search queries")
	return candidates

	except Exception as e:
	logger.error(f"❌ Error searching LinkedIn profiles: {str(e)}")
	logger.info("🔄 Trying fallback search methods...")
	fallback_results = self._fallback_search(job_description, location, max_results)
	# Cache fallback results even on error
	self.cache_service.set_search_results(job_description, location, max_results, fallback_results)
	return fallback_results

	def _validate_api_credentials(self) -> bool:
	"""Validate that we have proper API credentials"""
	if not self.api_key or self.api_key == "test_google_api_key" or self.api_key == "your_google_api_key_here":
	logger.warning("⚠️ Invalid Google API key detected")
	return False

	if not self.cse_id or self.cse_id == "test_search_engine_id" or self.cse_id == "your_search_engine_id_here":
	logger.warning("⚠️ Invalid Google CSE ID detected")
	return False

	return True

	def _fallback_search(self, job_description: str, location: Optional[str] = None, max_results: int = 10) -> List[Dict]:
	"""Fallback search method when Google API is not available"""
	logger.info("🔄 Using fallback search method...")

	# Create sample profiles based on the job description
	# This is a temporary solution until proper API credentials are configured
	sample_profiles = self._generate_sample_profiles(job_description, location, max_results)

	logger.info(f"📊 Generated {len(sample_profiles)} sample profiles for demonstration")
	return sample_profiles

	def _generate_sample_profiles(self, job_description: str, location: Optional[str] = None, max_results: int = 10) -> List[Dict]:
	"""Generate sample profiles for demonstration purposes"""
	# Extract key terms for more relevant sample profiles
	key_terms = self._extract_key_terms(job_description)

	# Expanded sample data based on common software engineering profiles
	sample_data = [
	{
	"name": "Sarah Chen",
	"headline": "Senior Software Engineer at TechCorp",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/sarah-chen-123456",
	"company": "TechCorp",
	"education": "Stanford University - Master of Science, Computer Science",
	"experience_summary": "5+ years building scalable web applications with Python, React, and AWS. Led development of microservices architecture serving 1M+ users."
	},
	{
	"name": "Michael Rodriguez",
	"headline": "Full Stack Developer \| Python \| React \| Node.js",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/michael-rodriguez-789012",
	"company": "StartupXYZ",
	"education": "UC Berkeley - Bachelor of Science, Software Engineering",
	"experience_summary": "Experienced full-stack developer with expertise in modern web technologies. Built and deployed applications using Python, React, and cloud platforms."
	},
	{
	"name": "Emily Johnson",
	"headline": "Software Engineer \| Backend Development \| Python",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/emily-johnson-345678",
	"company": "DataFlow Inc",
	"education": "MIT - Master of Science, Computer Science",
	"experience_summary": "Backend engineer specializing in Python development, database design, and API development. Experience with Django, Flask, and PostgreSQL."
	},
	{
	"name": "David Kim",
	"headline": "Senior Developer \| React \| Python \| DevOps",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/david-kim-901234",
	"company": "CloudTech Solutions",
	"education": "University of Washington - Bachelor of Science, Computer Science",
	"experience_summary": "Full-stack developer with 6+ years experience in React, Python, and cloud infrastructure. Led multiple successful product launches."
	},
	{
	"name": "Lisa Wang",
	"headline": "Software Engineer \| Frontend Specialist \| React",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/lisa-wang-567890",
	"company": "WebFlow",
	"education": "University of Michigan - Bachelor of Engineering, Computer Engineering",
	"experience_summary": "Frontend engineer passionate about creating intuitive user experiences. Expert in React, TypeScript, and modern CSS frameworks."
	},
	{
	"name": "Alex Thompson",
	"headline": "Principal Software Engineer \| System Architecture",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/alex-thompson-111111",
	"company": "EnterpriseTech",
	"education": "Carnegie Mellon University - Master of Science, Computer Science",
	"experience_summary": "Principal engineer with 8+ years designing and implementing large-scale distributed systems. Expert in microservices, cloud architecture, and performance optimization."
	},
	{
	"name": "Maria Garcia",
	"headline": "Senior Backend Engineer \| Python \| Go \| Microservices",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/maria-garcia-222222",
	"company": "ScaleUp Inc",
	"education": "University of Texas - Bachelor of Science, Computer Science",
	"experience_summary": "Backend specialist with expertise in high-performance systems, database optimization, and API design. Led teams building services handling millions of requests daily."
	},
	{
	"name": "James Wilson",
	"headline": "Full Stack Lead Developer \| React \| Node.js \| AWS",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/james-wilson-333333",
	"company": "Digital Solutions",
	"education": "Georgia Tech - Bachelor of Science, Computer Engineering",
	"experience_summary": "Lead developer with 7+ years experience in modern web development. Expert in React ecosystem, Node.js backend development, and AWS cloud infrastructure."
	},
	{
	"name": "Sophie Brown",
	"headline": "Software Engineer \| Machine Learning \| Python",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/sophie-brown-444444",
	"company": "AI Innovations",
	"education": "University of California - Master of Science, Data Science",
	"experience_summary": "ML engineer specializing in Python, TensorFlow, and PyTorch. Experience building recommendation systems and natural language processing applications."
	},
	{
	"name": "Ryan Davis",
	"headline": "DevOps Engineer \| Kubernetes \| Docker \| CI/CD",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/ryan-davis-555555",
	"company": "CloudFirst",
	"education": "University of Illinois - Bachelor of Science, Computer Science",
	"experience_summary": "DevOps specialist with expertise in containerization, orchestration, and automation. Led infrastructure teams managing production environments."
	},
	{
	"name": "Jennifer Lee",
	"headline": "Senior Frontend Engineer \| React \| TypeScript \| UX",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/jennifer-lee-666666",
	"company": "UserExperience Pro",
	"education": "University of Washington - Master of Science, Human Computer Interaction",
	"experience_summary": "Frontend engineer passionate about user experience and accessibility. Expert in React, TypeScript, and modern frontend architecture patterns."
	},
	{
	"name": "Carlos Martinez",
	"headline": "Software Architect \| System Design \| Java \| Spring",
	"location": location or "San Francisco, CA",
	"profile_url": "https://linkedin.com/in/carlos-martinez-777777",
	"company": "Enterprise Systems",
	"education": "University of California - Master of Science, Software Engineering",
	"experience_summary": "Software architect with 10+ years designing enterprise systems. Expert in Java ecosystem, Spring framework, and scalable system architecture."
	}
	]

	# Filter and customize based on job description keywords
	relevant_profiles = []
	job_desc_lower = job_description.lower()

	for profile in sample_data:
	# Check if profile matches job requirements
	profile_text = f"{profile['headline']} {profile['experience_summary']}".lower()

	# Simple relevance scoring
	relevance_score = 0
	for term in key_terms:
	if term in profile_text:
	relevance_score += 1

	# Add profiles that have some relevance or if we need more results
	if relevance_score > 0 or len(relevant_profiles) < max_results:
	# Customize profile based on job description
	customized_profile = profile.copy()

	# Adjust headline based on job requirements
	if "senior" in job_desc_lower and "senior" not in profile['headline'].lower():
	customized_profile['headline'] = f"Senior {profile['headline']}"

	if "python" in job_desc_lower and "python" not in profile['headline'].lower():
	customized_profile['headline'] += " \| Python"

	if "react" in job_desc_lower and "react" not in profile['headline'].lower():
	customized_profile['headline'] += " \| React"

	relevant_profiles.append(customized_profile)

	# If we still need more profiles, create additional ones
	while len(relevant_profiles) < max_results:
	# Generate additional profiles with variations
	base_profile = sample_data[len(relevant_profiles) % len(sample_data)].copy()

	# Create variations
	variations = [
	{"name": f"Alex {base_profile['name'].split()[1]}", "headline": f"Software Engineer \| {key_terms[0] if key_terms else 'Development'}"},
	{"name": f"Jordan {base_profile['name'].split()[1]}", "headline": f"Full Stack Developer \| {key_terms[0] if key_terms else 'Web Development'}"},
	{"name": f"Taylor {base_profile['name'].split()[1]}", "headline": f"Backend Engineer \| {key_terms[0] if key_terms else 'API Development'}"},
	{"name": f"Casey {base_profile['name'].split()[1]}", "headline": f"Frontend Developer \| {key_terms[0] if key_terms else 'UI/UX'}"},
	{"name": f"Riley {base_profile['name'].split()[1]}", "headline": f"DevOps Engineer \| {key_terms[0] if key_terms else 'Infrastructure'}"}
	]

	variation = variations[len(relevant_profiles) % len(variations)]
	new_profile = base_profile.copy()
	new_profile.update(variation)
	new_profile["profile_url"] = f"https://linkedin.com/in/{new_profile['name'].lower().replace(' ', '-')}-{len(relevant_profiles):06d}"

	relevant_profiles.append(new_profile)

	# Return up to max_results
	return relevant_profiles[:max_results]

	def _build_multiple_search_queries(self, job_description: str, location: Optional[str] = None) -> List[str]:
	"""Build multiple search queries for better coverage, targeting About section and summary."""
	logger.info("🔧 Extracting key terms from job description...")
	key_terms = self._extract_key_terms(job_description)
	logger.info(f"📋 Extracted key terms: {key_terms}")

	queries = []

	# Query 1: Basic profile search
	query_parts = ["site:linkedin.com/in/", "profile"] + key_terms
	if location:
	query_parts.append(location)
	queries.append(" ".join(query_parts))
	logger.info(f"📝 Query 1 (Basic): {' '.join(query_parts)}")

	# Query 2: Experience-focused search
	query_parts = ["site:linkedin.com/in/", "experience"] + key_terms
	if location:
	query_parts.append(location)
	queries.append(" ".join(query_parts))
	logger.info(f"📝 Query 2 (Experience): {' '.join(query_parts)}")

	# Query 3: Company-focused search
	query_parts = ["site:linkedin.com/in/", "company"] + key_terms
	if location:
	query_parts.append(location)
	queries.append(" ".join(query_parts))
	logger.info(f"📝 Query 3 (Company): {' '.join(query_parts)}")

	# Query 4: About section search
	query_parts = ["site:linkedin.com/in/", "about"] + key_terms
	if location:
	query_parts.append(location)
	queries.append(" ".join(query_parts))
	logger.info(f"📝 Query 4 (About): {' '.join(query_parts)}")

	# Query 5: Summary section search
	query_parts = ["site:linkedin.com/in/", "summary"] + key_terms
	if location:
	query_parts.append(location)
	queries.append(" ".join(query_parts))
	logger.info(f"📝 Query 5 (Summary): {' '.join(query_parts)}")

	# Query 6: Natural language query to encourage About section in snippet
	nl_query = f"site:linkedin.com/in/ About section {job_description} {location or ''}"
	queries.append(nl_query.strip())
	logger.info(f"📝 Query 6 (Natural Language): {nl_query[:80]}...")

	# Query 7: Bio section search
	query_parts = ["site:linkedin.com/in/", "bio"] + key_terms
	if location:
	query_parts.append(location)
	queries.append(" ".join(query_parts))
	logger.info(f"📝 Query 7 (Bio): {' '.join(query_parts)}")

	return queries

	def _deduplicate_search_results(self, search_results: List[Dict]) -> List[Dict]:
	"""Remove duplicate search results based on profile URL"""
	seen_urls = set()
	unique_results = []

	for result in search_results:
	profile_url = self._extract_linkedin_url(result.get('link', ''))
	if profile_url and profile_url not in seen_urls:
	seen_urls.add(profile_url)
	unique_results.append(result)

	return unique_results

	def _extract_key_terms(self, job_description: str) -> List[str]:
	"""Extract key terms from job description for search optimization"""
	# Common job-related keywords to focus on
	job_keywords = [
	"software engineer", "developer", "programmer", "engineer",
	"manager", "director", "lead", "senior", "principal",
	"full stack", "frontend", "backend", "devops", "data",
	"machine learning", "AI", "artificial intelligence",
	"python", "javascript", "java", "react", "node.js"
	]

	# Extract matching keywords from job description
	found_keywords = []
	job_desc_lower = job_description.lower()

	for keyword in job_keywords:
	if keyword in job_desc_lower:
	found_keywords.append(keyword)

	# If no specific keywords found, use general terms
	if not found_keywords:
	found_keywords = ["professional", "experience"]

	return found_keywords[:3] # Limit to top 3 keywords

	def _perform_google_search(self, query: str, max_results: int) -> List[Dict]:
	"""Perform Google Custom Search API request"""
	logger.info(f"🌐 Starting Google search for: {query[:60]}...")

	# Check cache first for this specific query
	cached_results = self.cache_service.get_query_results(query, max_results)
	if cached_results:
	logger.info(f"🎯 Returning {len(cached_results)} cached query results")
	return cached_results

	results = []

	# Ensure we always make at least one request
	if max_results <= 0:
	max_results = 1

	# Google CSE returns max 10 results per request
	# Calculate how many requests we need
	num_requests = max(1, min(5, (max_results + 9) // 10)) # At least 1 request, max 5
	logger.info(f"📊 Will make {num_requests} API requests (max 10 results per request)")

	for i in range(num_requests):
	start_index = i * 10 + 1
	# Calculate how many results to request for this specific request
	results_per_request = min(10, max_results - i * 10)

	# Ensure we request at least 1 result
	if results_per_request <= 0:
	results_per_request = 1

	logger.info(f"🔍 API request {i+1}/{num_requests} (start index: {start_index}, results: {results_per_request})")

	params = {
	'key': self.api_key,
	'cx': self.cse_id,
	'q': query,
	'start': start_index,
	'num': results_per_request
	}

	try:
	logger.info(f"📡 Making API request to Google Custom Search...")
	response = requests.get(self.base_url, params=params)
	response.raise_for_status()

	data = response.json()

	if 'items' in data:
	results.extend(data['items'])
	logger.info(f"✅ Request {i+1} successful: got {len(data['items'])} results")
	else:
	logger.warning(f"⚠️ Request {i+1} returned no items")

	# Rate limiting
	if i < num_requests - 1: # Don't delay after last request
	logger.info(f"⏳ Rate limiting: waiting {self.delay}s before next request...")
	time.sleep(self.delay)

	except requests.exceptions.RequestException as e:
	logger.error(f"❌ Google search request {i+1} failed: {str(e)}")
	break
	except Exception as e:
	logger.error(f"❌ Error processing search results for request {i+1}: {str(e)}")
	break

	logger.info(f"📊 Google search completed: {len(results)} total results")

	# Cache the results
	self.cache_service.set_query_results(query, max_results, results)

	return results

	def _extract_profile_data(self, search_results: List[Dict]) -> List[Dict]:
	"""Extract and parse LinkedIn profile data from search results"""
	logger.info(f"🔧 Starting profile data extraction for {len(search_results)} search results")
	candidates = []

	for i, result in enumerate(search_results, 1):
	try:
	logger.info(f"📋 Processing result {i}/{len(search_results)}")

	# Extract LinkedIn URL
	profile_url = self._extract_linkedin_url(result.get('link', ''))
	if not profile_url:
	logger.warning(f"⚠️ Result {i}: Not a valid LinkedIn URL, skipping")
	continue

	logger.info(f"🔗 Result {i}: Valid LinkedIn URL found: {profile_url}")

	# Extract profile information from snippet
	snippet = result.get('snippet', '')
	title = result.get('title', '')
	logger.info(f"📄 Result {i}: Title: {title[:60]}...")
	logger.info(f"📄 Result {i}: Snippet length: {len(snippet)} characters")

	# Try to get more detailed information by scraping the profile
	logger.info(f"🌐 Result {i}: Attempting to scrape profile for detailed data...")
	detailed_data = self._scrape_linkedin_profile(profile_url)

	if detailed_data.get('success'):
	logger.info(f"✅ Result {i}: Profile scraping successful")
	else:
	logger.warning(f"⚠️ Result {i}: Profile scraping failed: {detailed_data.get('error', 'Unknown error')}")

	# Parse basic profile data
	logger.info(f"🔧 Result {i}: Parsing profile data...")
	profile_data = self._parse_profile_snippet(title, snippet, profile_url, detailed_data)

	if profile_data:
	candidates.append(profile_data)
	logger.info(f"✅ Result {i}: Profile data extracted successfully")
	logger.info(f" 👤 Name: {profile_data.get('name', 'Unknown')}")
	logger.info(f" 💼 Company: {profile_data.get('company', 'Unknown')}")
	logger.info(f" 🎓 Education: {profile_data.get('education', 'Unknown')}")
	else:
	logger.warning(f"⚠️ Result {i}: Failed to extract profile data")

	except Exception as e:
	logger.warning(f"❌ Error parsing profile data for result {i}: {str(e)}")
	continue

	logger.info(f"🎉 Profile data extraction completed: {len(candidates)} successful extractions")
	return candidates

	def _scrape_linkedin_profile(self, profile_url: str) -> Dict:
	"""Attempt to scrape LinkedIn profile for more detailed information, including About section."""
	logger.info(f"🌐 Scraping LinkedIn profile: {profile_url}")

	# Check cache first for this profile
	cached_profile_data = self.cache_service.get_profile_data(profile_url)
	if cached_profile_data:
	logger.info(f"🎯 Returning cached profile data for: {profile_url}")
	return cached_profile_data

	try:
	logger.info(f"📡 Making HTTP request to LinkedIn...")
	response = self.session.get(profile_url, timeout=10)

	if response.status_code == 200:
	logger.info(f"✅ HTTP request successful (status: {response.status_code})")
	soup = BeautifulSoup(response.content, 'html.parser')

	logger.info(f"🔧 Extracting structured data...")
	structured_data = self._extract_structured_data(soup)
	logger.info(f"📊 Found {len(structured_data)} structured data fields")

	logger.info(f"📄 Extracting text content...")
	text_content = soup.get_text()
	logger.info(f"📊 Text content length: {len(text_content)} characters")

	# Try to extract About section
	logger.info(f"📝 Attempting to extract About section...")
	about_section = self._extract_about_section(soup, text_content)
	if about_section:
	logger.info(f"✅ About section found: {len(about_section)} characters")
	else:
	logger.warning(f"⚠️ About section not found")

	# Try to extract education information
	logger.info(f"🎓 Attempting to extract education information...")
	education = self._extract_education_from_linkedin_profile(soup)
	if education:
	logger.info(f"✅ Education found: {education}")
	# Add education to structured data for easier access
	structured_data['alumniOf'] = education
	else:
	logger.warning(f"⚠️ Education not found")

	profile_data = {
	'structured_data': structured_data,
	'text_content': text_content[:2000], # Limit content length
	'about_section': about_section,
	'education': education,
	'success': True
	}

	# Cache the profile data
	self.cache_service.set_profile_data(profile_url, profile_data)

	return profile_data
	else:
	logger.warning(f"⚠️ HTTP request failed (status: {response.status_code})")
	return {'success': False, 'status_code': response.status_code}

	except Exception as e:
	logger.warning(f"❌ Failed to scrape LinkedIn profile {profile_url}: {str(e)}")
	return {'success': False, 'error': str(e)}

	def _extract_structured_data(self, soup: BeautifulSoup) -> Dict:
	"""Extract structured data from LinkedIn profile page"""
	structured_data = {}

	try:
	# Look for JSON-LD structured data
	json_ld_scripts = soup.find_all('script', type='application/ld+json')
	for script in json_ld_scripts:
	try:
	script_content = getattr(script, 'string', None)
	if script_content: # Check if string is not None
	data = json.loads(script_content)
	if isinstance(data, dict):
	structured_data.update(data)
	except json.JSONDecodeError:
	continue

	# Look for meta tags with profile information
	meta_tags = {
	'description': soup.find('meta', attrs={'name': 'description'}),
	'keywords': soup.find('meta', attrs={'name': 'keywords'}),
	'og:title': soup.find('meta', attrs={'property': 'og:title'}),
	'og:description': soup.find('meta', attrs={'property': 'og:description'}),
	}

	for key, tag in meta_tags.items():
	if tag and hasattr(tag, 'attrs'):
	tag_attrs = getattr(tag, 'attrs', {})
	if 'content' in tag_attrs:
	structured_data[key] = tag_attrs['content']

	except Exception as e:
	logger.warning(f"Error extracting structured data: {str(e)}")

	return structured_data

	def _extract_about_section(self, soup: BeautifulSoup, text_content: str) -> Optional[str]:
	"""Try to extract the About section from the LinkedIn profile HTML or text."""
	logger.info(f"🔍 Looking for About section in HTML...")
	about = None

	# Look for headings like 'About' or 'Summary'
	logger.info(f"🔍 Searching for About/Summary headings...")
	for heading in soup.find_all(['h2', 'h3', 'span']):
	heading_text = heading.get_text(strip=True).lower()
	if heading_text in ['about', 'summary', 'bio']:
	logger.info(f"✅ Found heading: '{heading_text}'")

	# The About section is often in the next sibling or parent
	next_elem = heading.find_next_sibling()
	if next_elem and hasattr(next_elem, 'get_text') and next_elem.get_text(strip=True):
	about = next_elem.get_text(strip=True)
	logger.info(f"✅ Found About section in next sibling: {len(about)} characters")
	break

	parent = heading.parent
	if parent and hasattr(parent, 'get_text') and parent.get_text(strip=True) and len(parent.get_text(strip=True)) > 30:
	about = parent.get_text(strip=True)
	logger.info(f"✅ Found About section in parent: {len(about)} characters")
	break

	# Fallback: look for 'About' in text content
	if not about and text_content:
	logger.info(f"🔍 Searching for 'About' pattern in text content...")
	match = re.search(r'About[\s\n]+([A-Z][^\n]{30,600})', text_content)
	if match:
	about = match.group(1).strip()
	logger.info(f"✅ Found About section via regex: {len(about)} characters")

	# Fallback: use meta description or og:description
	if not about:
	logger.info(f"🔍 Looking for meta description...")
	meta_desc = soup.find('meta', attrs={'name': 'description'})
	if meta_desc and hasattr(meta_desc, 'attrs'):
	meta_attrs = getattr(meta_desc, 'attrs', {})
	if 'content' in meta_attrs:
	about = meta_attrs['content']
	logger.info(f"✅ Found About section in meta description: {len(about) if about else 0} characters")

	if not about:
	logger.info(f"🔍 Looking for og:description...")
	og_desc = soup.find('meta', attrs={'property': 'og:description'})
	if og_desc and hasattr(og_desc, 'attrs'):
	og_attrs = getattr(og_desc, 'attrs', {})
	if 'content' in og_attrs:
	about = og_attrs['content']
	logger.info(f"✅ Found About section in og:description: {len(about) if about else 0} characters")

	if about and not isinstance(about, str):
	about = str(about)

	if about:
	logger.info(f"✅ About section extraction successful: {len(about)} characters")
	else:
	logger.warning(f"⚠️ About section extraction failed")

	return about if isinstance(about, str) else None

	def _extract_linkedin_url(self, url: str) -> Optional[str]:
	"""Extract and validate LinkedIn profile URL"""
	if not url:
	return None

	# Check if it's a LinkedIn profile URL
	if 'linkedin.com/in/' in url:
	# Clean up the URL
	clean_url = url.split('?')[0] # Remove query parameters
	return clean_url

	return None

	def _parse_profile_snippet(self, title: str, snippet: str, profile_url: str, detailed_data: Optional[Dict] = None) -> Optional[Dict]:
	"""Parse LinkedIn profile information from search result snippet and detailed data"""
	try:
	logger.info(f"🔧 Parsing profile data from title and snippet...")

	# Extract name from title (usually "Name \| Headline \| Location")
	logger.info(f"👤 Extracting name from title...")
	name = self._extract_name_from_title(title)
	logger.info(f"✅ Extracted name: {name}")

	# Extract headline and location from snippet
	logger.info(f"💼 Extracting headline and location...")
	headline, location = self._extract_headline_and_location(snippet)
	logger.info(f"✅ Extracted headline: {headline}")
	logger.info(f"✅ Extracted location: {location}")

	# Extract company from title first, then snippet, then detailed data
	logger.info(f"🏢 Extracting company information...")
	company = self._extract_company_from_title(title)
	if company:
	logger.info(f"✅ Found company in title: {company}")
	else:
	logger.info(f"🔍 Company not found in title, checking snippet...")
	company = self._extract_company_from_snippet(snippet)
	if company:
	logger.info(f"✅ Found company in snippet: {company}")
	else:
	logger.info(f"🔍 Company not found in snippet, checking detailed data...")
	if detailed_data and detailed_data.get('success'):
	company = self._extract_company_from_detailed_data(detailed_data)
	if company:
	logger.info(f"✅ Found company in detailed data: {company}")
	else:
	logger.warning(f"⚠️ Company not found in any source")
	else:
	logger.warning(f"⚠️ Company not found in any source")

	# Extract education from snippet and detailed data
	logger.info(f"🎓 Extracting education information...")
	education = self._extract_education_from_snippet(snippet)
	if education:
	logger.info(f"✅ Found education in snippet: {education}")
	else:
	logger.info(f"🔍 Education not found in snippet, checking detailed data...")
	if detailed_data and detailed_data.get('success'):
	# First check if education was extracted during scraping
	scraped_education = detailed_data.get('education')
	if scraped_education:
	logger.info(f"✅ Found education from scraping: {scraped_education}")
	education = scraped_education
	else:
	# Fallback to other extraction methods
	education = self._extract_education_from_detailed_data(detailed_data)
	if education:
	logger.info(f"✅ Found education in detailed data: {education}")
	else:
	logger.warning(f"⚠️ Education not found in any source")
	else:
	logger.warning(f"⚠️ Education not found in any source")

	# Create better experience summary
	logger.info(f"📝 Creating experience summary...")
	experience_summary = self._create_experience_summary(snippet, detailed_data)
	logger.info(f"✅ Experience summary created: {len(experience_summary)} characters")

	# Create candidate profile
	candidate = {
	'name': name or 'Unknown',
	'headline': headline or 'Professional',
	'location': location or 'Unknown',
	'profile_url': profile_url,
	'company': company,
	'education': education,
	'experience_summary': experience_summary
	}

	logger.info(f"✅ Profile parsing completed successfully")
	return candidate

	except Exception as e:
	logger.warning(f"❌ Error parsing profile snippet: {str(e)}")
	return None

	def _extract_name_from_title(self, title: str) -> str:
	"""Extract name from LinkedIn profile title"""
	if not title:
	return 'Unknown'

	# LinkedIn titles are usually "Name \| Headline \| Location" or "Name - Headline at Company"
	# First, try to extract just the name part
	name_patterns = [
	r'^([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\s[-\|]\s*)', # Name followed by - or \|
	r'^([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*?)(?:\s+at\s+)', # Name followed by "at"
	r'^([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\s,\s*)', # Name followed by comma
	]

	for pattern in name_patterns:
	match = re.match(pattern, title)
	if match:
	name = match.group(1).strip()
	# Clean up common LinkedIn prefixes
	name = name.replace('LinkedIn', '').strip()
	if name and len(name) > 2:
	return name

	# Fallback: take first part before any separator
	parts = re.split(r'[-\|,]\s*', title)
	if parts:
	name = parts[0].strip()
	# Clean up common LinkedIn prefixes
	name = name.replace('LinkedIn', '').strip()
	return name if name else 'Unknown'

	return 'Unknown'

	def _extract_headline_and_location(self, snippet: str) -> tuple:
	"""Extract headline and location from snippet"""
	headline = 'Professional'
	location = 'Unknown'

	if not snippet:
	return headline, location

	# Look for location patterns (City, State or Country)
	location_patterns = [
	r'([A-Z][a-z]+(?:[\s,]+[A-Z][a-z]+))\s,\s*([A-Z]{2})', # City, State
	r'([A-Z][a-z]+(?:[\s,]+[A-Z][a-z]+))\s,\s*([A-Z][a-z]+)', # City, Country
	]

	for pattern in location_patterns:
	match = re.search(pattern, snippet)
	if match:
	location = f"{match.group(1)}, {match.group(2)}"
	break

	# Extract headline (usually contains job title) - improved logic
	lines = snippet.split('.')
	for line in lines:
	line = line.strip()
	# Look for job title patterns
	if any(keyword in line.lower() for keyword in ['engineer', 'developer', 'manager', 'director', 'lead', 'senior', 'principal', 'architect']):
	# Filter out lines that are likely names
	if not self._is_likely_name(line) and len(line) > 5:
	headline = line
	break

	return headline, location

	def _extract_company_from_title(self, title: str) -> Optional[str]:
	"""Extract company name from LinkedIn profile title"""
	if not title:
	return None

	# LinkedIn titles are usually "Name \| Headline \| Location" or "Name - Headline at Company"
	# Look for company patterns in title
	company_patterns = [
	r'at\s+([A-Z][a-zA-Z\s&\.]+?)(?:\s\|$\|$\|$)',
	r'-\s+([A-Z][a-zA-Z\s&\.]+?)\s+at\s+',
	r'\\|\s+([A-Z][a-zA-Z\s&\.]+?)\s+at\s+',
	r'-\s+[^-]+-\s+([A-Z][a-zA-Z\s&\.]+?)(?:\s\|$\|$\|$)', # Third part after two dashes
	r'\\|\s+[^\|]+\\|\s+([A-Z][a-zA-Z\s&\.]+?)(?:\s\|$\|$\|$)', # Third part after two pipes
	]

	for pattern in company_patterns:
	match = re.search(pattern, title)
	if match:
	company = match.group(1).strip()
	if (len(company) > 2 and len(company) < 50 and
	company.lower() not in ['linkedin', 'profile', 'view', 'professional', 'experience'] and
	not self._is_likely_name(company)):
	return company

	# Try to extract from the last part of the title (after last separator)
	parts = re.split(r'[-\|]\s*', title)
	if len(parts) >= 3:
	last_part = parts[-1].strip()
	# Check if the last part looks like a company name
	if (len(last_part) > 3 and len(last_part) < 50 and
	not self._is_likely_name(last_part) and
	last_part.lower() not in ['linkedin', 'profile', 'view', 'professional', 'experience']):
	return last_part

	return None

	def _extract_company_from_snippet(self, snippet: str) -> Optional[str]:
	"""Extract company name from snippet"""
	if not snippet:
	return None

	# Look for company patterns - more comprehensive patterns
	company_patterns = [
	r'at\s+([A-Z][a-zA-Z\s&\.]+?)(?:\s\|,\|\.\|$\|$\|$)',
	r'([A-Z][a-zA-Z\s&\.]+?)\s+•\s+',
	r'([A-Z][a-zA-Z\s&\.]+?)\s+-\s+',
	r'([A-Z][a-zA-Z\s&\.]+?)\s+\(',
	r'([A-Z][a-zA-Z\s&\.]+?)\s+at\s+',
	r'([A-Z][a-zA-Z\s&\.]+?)\s+Software\s+Engineer',
	r'([A-Z][a-zA-Z\s&\.]+?)\s+Senior\s+',
	r'([A-Z][a-zA-Z\s&\.]+?)\s+Developer',
	]

	for pattern in company_patterns:
	match = re.search(pattern, snippet)
	if match:
	company = match.group(1).strip()
	# Filter out common false positives and names
	if (len(company) > 2 and len(company) < 50 and
	company.lower() not in ['linkedin', 'profile', 'view', 'professional', 'experience'] and
	not self._is_likely_name(company)):
	return company

	return None

	def _is_likely_name(self, text: str) -> bool:
	"""Check if text is likely a person's name rather than a company"""
	# Common name patterns
	name_indicators = [
	r'^[A-Z][a-z]+\s+[A-Z][a-z]+$', # First Last
	r'^[A-Z][a-z]+\s+[A-Z][a-z]+\s+[A-Z][a-z]+$', # First Middle Last
	r'^[A-Z][a-z]+\.\s+[A-Z][a-z]+$', # F. Last
	]

	for pattern in name_indicators:
	if re.match(pattern, text):
	return True

	# Check for common name words
	common_names = ['michael', 'john', 'david', 'james', 'robert', 'mary', 'jennifer', 'lisa', 'sarah']
	if text.lower() in common_names:
	return True

	return False

	def _extract_company_from_detailed_data(self, detailed_data: Dict) -> Optional[str]:
	"""Extract company from detailed scraped data"""
	try:
	# Check structured data first
	structured_data = detailed_data.get('structured_data', {})

	# Look for organization information in structured data
	if 'worksFor' in structured_data:
	return structured_data['worksFor']

	# Check meta description for company info
	description = structured_data.get('description', '')
	if description:
	company_match = re.search(r'at\s+([A-Z][a-zA-Z\s&]+?)(?:\s\|,\|\.\|$)', description)
	if company_match:
	return company_match.group(1).strip()

	# Check text content
	text_content = detailed_data.get('text_content', '')
	if text_content:
	return self._extract_company_from_snippet(text_content)

	except Exception as e:
	logger.warning(f"Error extracting company from detailed data: {str(e)}")

	return None

	def _extract_education_from_snippet(self, snippet: str) -> Optional[str]:
	"""Extract education information from snippet using improved patterns"""
	if not snippet:
	return None

	# Look for education patterns - more comprehensive and specific to LinkedIn structure
	education_patterns = [
	# Pattern for "Education: University Name" format (from top card)
	r'Education:\s*([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute\|Academy))',
	# Pattern for degree + university format
	r'(?:Bachelor\|Master\|PhD\|BSc\|MSc\|MBA\|BS\|MS)\s+(?:of\|in\|from)\s+([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute))',
	# Pattern for university name followed by degree
	r'([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute)).*?(?:Bachelor\|Master\|PhD\|BSc\|MSc\|MBA\|BS\|MS)',
	# Pattern for "Studied at" format
	r'(?:Studied\|Graduated\|Attended)\s+(?:from\|at)\s+([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute))',
	# Pattern for university name with degree in parentheses
	r'([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute)).*?\((?:Bachelor\|Master\|PhD\|BSc\|MSc\|MBA\|BS\|MS)',
	# Pattern for degree, field format
	r'(?:Bachelor\|Master\|PhD\|BSc\|MSc\|MBA\|BS\|MS),\s*[A-Za-z\s/]+(?:from\|at)\s+([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute))',
	]

	for pattern in education_patterns:
	match = re.search(pattern, snippet, re.IGNORECASE)
	if match:
	education = match.group(1).strip()
	if len(education) > 3 and len(education) < 100:
	# Clean up the education string
	education = re.sub(r'\s+', ' ', education) # Remove extra whitespace
	education = education.strip()
	return education

	# Fallback to keyword-based search with better context
	education_keywords = ['university', 'college', 'school', 'institute', 'bachelor', 'master', 'phd', 'degree']

	for keyword in education_keywords:
	if keyword in snippet.lower():
	# Find the sentence containing education info
	sentences = snippet.split('.')
	for sentence in sentences:
	if keyword in sentence.lower():
	# Extract university name from the sentence
	university_match = re.search(r'([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute))', sentence)
	if university_match:
	return university_match.group(1).strip()
	# If no university found, return the sentence itself
	return sentence.strip()

	return None

	def _extract_education_from_detailed_data(self, detailed_data: Dict) -> Optional[str]:
	"""Extract education from detailed scraped data using LinkedIn-specific selectors"""
	try:
	# Check structured data first
	structured_data = detailed_data.get('structured_data', {})

	# Look for education information in structured data
	if 'alumniOf' in structured_data:
	return structured_data['alumniOf']

	# Check meta description for education info
	description = structured_data.get('description', '')
	if description:
	education_match = re.search(r'([A-Z][a-zA-Z\s&]+?(?:university\|college\|school\|institute))', description, re.IGNORECASE)
	if education_match:
	return education_match.group(1).strip()

	# Check text content for LinkedIn-specific education patterns
	text_content = detailed_data.get('text_content', '')
	if text_content:
	# Look for education section specifically
	education_section = self._extract_education_section_from_html(text_content)
	if education_section:
	return education_section

	# Fallback to snippet extraction
	return self._extract_education_from_snippet(text_content)

	except Exception as e:
	logger.warning(f"Error extracting education from detailed data: {str(e)}")

	return None

	def _extract_education_section_from_html(self, html_content: str) -> Optional[str]:
	"""Extract education information from LinkedIn HTML structure"""
	try:
	# Look for education section using LinkedIn-specific patterns
	education_patterns = [
	# Pattern for education section header
	r'<h2[^>]>.?Education.?</h2>.?<span[^>]>([^<]+(?:University\|College\|School\|Institute)[^<])</span>',
	# Pattern for education in top card
	r'aria-label="Education:\s([^"]+(?:University\|College\|School\|Institute)[^"])"',
	# Pattern for education list items
	r'<li[^>]>.?<span[^>]>([^<]+(?:University\|College\|School\|Institute)[^<])</span>',
	# Pattern for education in bold text
	r'<span[^>]class="[^"]t-bold[^"]"[^>]>([^<]+(?:University\|College\|School\|Institute)[^<]*)</span>',
	]

	for pattern in education_patterns:
	match = re.search(pattern, html_content, re.IGNORECASE \| re.DOTALL)
	if match:
	education = match.group(1).strip()
	if len(education) > 3 and len(education) < 100:
	# Clean up the education string
	education = re.sub(r'\s+', ' ', education) # Remove extra whitespace
	education = education.strip()
	return education

	# Look for education keywords in the HTML
	if 'education' in html_content.lower():
	# Find the section containing education
	lines = html_content.split('\n')
	for i, line in enumerate(lines):
	if 'education' in line.lower() and 'university' in line.lower():
	# Extract university name from this line or nearby lines
	university_match = re.search(r'([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute))', line)
	if university_match:
	return university_match.group(1).strip()

	# Check next few lines for university name
	for j in range(i+1, min(i+5, len(lines))):
	university_match = re.search(r'([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute))', lines[j])
	if university_match:
	return university_match.group(1).strip()

	except Exception as e:
	logger.warning(f"Error extracting education from HTML: {str(e)}")

	return None

	def _create_experience_summary(self, snippet: str, detailed_data: Optional[Dict] = None) -> str:
	"""Create a better experience summary from available data, prioritizing the About section."""
	logger.info(f"📝 Creating experience summary...")

	# Use About section if available
	if detailed_data and detailed_data.get('success'):
	about_section = detailed_data.get('about_section')
	if about_section and len(about_section) > 30:
	logger.info(f"✅ Using About section for experience summary")
	summary = about_section.strip()
	if len(summary) > 400:
	summary = summary[:397] + '...'
	logger.info(f"📏 Truncated summary to 400 characters")
	return summary
	else:
	logger.info(f"⚠️ About section not available or too short, using fallback logic")

	# Fallback to previous logic
	logger.info(f"🔍 Extracting relevant sentences from snippet...")
	summary_parts = []
	if snippet:
	sentences = snippet.split('.')
	relevant_sentences = []
	for sentence in sentences:
	sentence = sentence.strip()
	if len(sentence) > 20 and any(keyword in sentence.lower() for keyword in
	['engineer', 'developer', 'manager', 'lead', 'senior', 'experience', 'worked', 'responsible', 'developed', 'built', 'created']):
	relevant_sentences.append(sentence)
	if relevant_sentences:
	summary_parts.extend(relevant_sentences[:2])
	logger.info(f"✅ Found {len(relevant_sentences[:2])} relevant sentences from snippet")
	else:
	logger.info(f"⚠️ No relevant sentences found in snippet")

	if detailed_data and detailed_data.get('success'):
	logger.info(f"🔍 Adding structured data information...")
	structured_data = detailed_data.get('structured_data', {})
	if 'jobTitle' in structured_data:
	summary_parts.append(f"Current role: {structured_data['jobTitle']}")
	logger.info(f"✅ Added job title: {structured_data['jobTitle']}")
	if 'worksFor' in structured_data:
	summary_parts.append(f"Company: {structured_data['worksFor']}")
	logger.info(f"✅ Added company: {structured_data['worksFor']}")
	if 'alumniOf' in structured_data:
	summary_parts.append(f"Education: {structured_data['alumniOf']}")
	logger.info(f"✅ Added education: {structured_data['alumniOf']}")

	text_content = detailed_data.get('text_content', '')
	if text_content and not summary_parts:
	logger.info(f"🔍 Searching for experience keywords in text content...")
	experience_keywords = ['experience', 'worked', 'developed', 'built', 'created', 'managed']
	for keyword in experience_keywords:
	if keyword in text_content.lower():
	sentences = text_content.split('.')
	for sentence in sentences:
	if keyword in sentence.lower() and len(sentence.strip()) > 30:
	summary_parts.append(sentence.strip())
	logger.info(f"✅ Found experience sentence with keyword '{keyword}'")
	break
	if summary_parts:
	break

	if summary_parts:
	summary = '. '.join(summary_parts)
	if len(summary) > 400:
	summary = summary[:397] + '...'
	logger.info(f"📏 Truncated summary to 400 characters")
	logger.info(f"✅ Created summary from {len(summary_parts)} parts")
	return summary

	if snippet:
	logger.info(f"🔍 Using snippet as fallback...")
	words = snippet.split()
	if len(words) > 20:
	summary = ' '.join(words[:20]) + '...'
	logger.info(f"✅ Created fallback summary from first 20 words")
	return summary
	logger.info(f"✅ Using full snippet as summary")
	return snippet

	logger.warning(f"⚠️ No experience information available")
	return "Experience information not available"

	def _extract_education_from_linkedin_profile(self, soup: BeautifulSoup) -> Optional[str]:
	"""Extract education information from LinkedIn profile using BeautifulSoup"""
	try:
	# Method 1: Look for education section by ID
	education_section = soup.find('div', {'id': 'education'})
	if education_section:
	# Find the parent section that contains education information
	education_card = education_section.find_parent('section', class_='pv-profile-card')
	if education_card:
	# Look for university names in the education card
	university_elements = education_card.find_all(text=re.compile(r'.University.\|.College.\|.School.\|.Institute.'))
	for element in university_elements:
	if hasattr(element, 'strip') and element.strip() and len(element.strip()) > 3:
	return element.strip()

	# Method 2: Look for education in the top card section
	top_card = soup.find('section', class_=re.compile(r'artdeco-card.*'))
	if top_card:
	# Look for education button/link
	education_button = top_card.find('button', attrs={'aria-label': re.compile(r'Education:.*')})
	if education_button and hasattr(education_button, 'get'):
	# Extract university name from aria-label
	aria_label = education_button.get('aria-label', '')
	if aria_label and isinstance(aria_label, str):
	education_match = re.search(r'Education:\s*([^,]+)', aria_label)
	if education_match:
	return education_match.group(1).strip()

	# Look for education text in the top card
	education_text = top_card.find(text=re.compile(r'.University.\|.College.\|.School.\|.Institute.'))
	if education_text and hasattr(education_text, 'strip') and education_text.strip():
	return education_text.strip()

	# Method 3: Look for education in structured data
	structured_data = self._extract_structured_data(soup)
	if 'alumniOf' in structured_data:
	return structured_data['alumniOf']

	# Method 4: Look for education keywords in the entire page
	education_keywords = ['university', 'college', 'school', 'institute', 'bachelor', 'master', 'phd']
	for keyword in education_keywords:
	elements = soup.find_all(text=re.compile(keyword, re.IGNORECASE))
	for element in elements:
	if hasattr(element, 'strip'):
	text = element.strip()
	if len(text) > 10 and len(text) < 200:
	# Check if this looks like an education entry
	if any(edu_keyword in text.lower() for edu_keyword in ['university', 'college', 'school', 'institute']):
	# Extract just the university name
	university_match = re.search(r'([A-Z][a-zA-Z\s&\.]+?(?:University\|College\|School\|Institute))', text)
	if university_match:
	return university_match.group(1).strip()
	return text

	except Exception as e:
	logger.warning(f"Error extracting education from LinkedIn profile: {str(e)}")

	return None

	# Cache management methods
	def get_cache_stats(self) -> Dict[str, Any]:
	"""Get cache statistics"""
	return self.cache_service.get_cache_stats()

	def clear_cache(self, cache_type: str = "all"):
	"""Clear specified cache or all caches"""
	self.cache_service.clear_cache(cache_type)

	def cleanup_expired_cache(self):
	"""Clean up expired cache entries"""
	self.cache_service.cleanup_expired_entries()