Spaces:
Runtime error
Runtime error
| """ | |
| LinkedIn Profile Data Extractor | |
| Extracts user profile data and job listings from LinkedIn | |
| """ | |
| import os | |
| import logging | |
| import json | |
| import re | |
| from typing import Dict, Any, List, Optional | |
| from datetime import datetime | |
| import requests | |
| logger = logging.getLogger(__name__) | |
| class LinkedInProfileExtractor: | |
| """Extract and populate user data from LinkedIn""" | |
| def __init__(self): | |
| self.client_id = os.getenv('LINKEDIN_CLIENT_ID') | |
| self.client_secret = os.getenv('LINKEDIN_CLIENT_SECRET') | |
| self.access_token = None | |
| def set_access_token(self, token: str): | |
| """Set the OAuth access token""" | |
| self.access_token = token | |
| def extract_profile_data(self, profile_url: Optional[str] = None) -> Dict[str, Any]: | |
| """ | |
| Extract profile data from LinkedIn | |
| Note: Due to LinkedIn API restrictions, this provides a structure | |
| that would be filled with actual data when proper API access is available | |
| """ | |
| # LinkedIn API v2 endpoints (requires OAuth 2.0) | |
| if self.access_token: | |
| try: | |
| # Get basic profile | |
| headers = { | |
| 'Authorization': f'Bearer {self.access_token}', | |
| 'X-Restli-Protocol-Version': '2.0.0' | |
| } | |
| # Get user profile | |
| profile_response = requests.get( | |
| 'https://api.linkedin.com/v2/me', | |
| headers=headers | |
| ) | |
| # Get email | |
| email_response = requests.get( | |
| 'https://api.linkedin.com/v2/emailAddress?q=members&projection=(elements*(handle~))', | |
| headers=headers | |
| ) | |
| if profile_response.status_code == 200: | |
| profile = profile_response.json() | |
| email_data = email_response.json() if email_response.status_code == 200 else {} | |
| return self._parse_linkedin_response(profile, email_data) | |
| except Exception as e: | |
| logger.error(f"Error fetching LinkedIn profile: {e}") | |
| # Return template structure for manual filling or mock data | |
| return self._get_profile_template() | |
| def _parse_linkedin_response(self, profile: Dict, email_data: Dict) -> Dict[str, Any]: | |
| """Parse LinkedIn API response into our standard format""" | |
| extracted_data = { | |
| 'contact': { | |
| 'name': f"{profile.get('localizedFirstName', '')} {profile.get('localizedLastName', '')}".strip(), | |
| 'email': '', | |
| 'phone': '', | |
| 'linkedin': f"https://www.linkedin.com/in/{profile.get('vanityName', '')}", | |
| 'location': profile.get('localizedHeadline', '') | |
| }, | |
| 'summary': profile.get('summary', ''), | |
| 'headline': profile.get('localizedHeadline', ''), | |
| 'experience': [], | |
| 'education': [], | |
| 'skills': [], | |
| 'certifications': [], | |
| 'languages': [], | |
| 'projects': [] | |
| } | |
| # Extract email | |
| if email_data.get('elements'): | |
| for element in email_data['elements']: | |
| if 'handle~' in element: | |
| extracted_data['contact']['email'] = element['handle~'].get('emailAddress', '') | |
| break | |
| # Note: Full experience, education, skills require additional API calls | |
| # with specific permissions that are restricted in LinkedIn's current API | |
| return extracted_data | |
| def _get_profile_template(self) -> Dict[str, Any]: | |
| """Get a template structure for profile data""" | |
| return { | |
| 'contact': { | |
| 'name': '', | |
| 'email': '', | |
| 'phone': '', | |
| 'linkedin': '', | |
| 'location': '', | |
| 'website': '' | |
| }, | |
| 'summary': '', | |
| 'headline': '', | |
| 'experience': [ | |
| { | |
| 'title': '', | |
| 'company': '', | |
| 'location': '', | |
| 'start_date': '', | |
| 'end_date': '', | |
| 'description': '', | |
| 'skills_used': [] | |
| } | |
| ], | |
| 'education': [ | |
| { | |
| 'degree': '', | |
| 'field': '', | |
| 'school': '', | |
| 'start_date': '', | |
| 'end_date': '', | |
| 'description': '' | |
| } | |
| ], | |
| 'skills': [], | |
| 'certifications': [], | |
| 'languages': [], | |
| 'projects': [], | |
| 'recommendations': [] | |
| } | |
| def search_jobs(self, keywords: str, location: str = '') -> List[Dict[str, Any]]: | |
| """ | |
| Search for jobs on LinkedIn | |
| Note: LinkedIn Jobs API has strict limitations | |
| """ | |
| jobs = [] | |
| if self.access_token: | |
| try: | |
| # LinkedIn Jobs API is heavily restricted | |
| # This is a placeholder for when proper access is available | |
| headers = { | |
| 'Authorization': f'Bearer {self.access_token}', | |
| 'X-Restli-Protocol-Version': '2.0.0' | |
| } | |
| # Note: Actual job search API requires special partnership access | |
| # Using mock structure for demonstration | |
| pass | |
| except Exception as e: | |
| logger.error(f"Error searching LinkedIn jobs: {e}") | |
| # Return mock data for demonstration | |
| return self._get_mock_linkedin_jobs(keywords, location) | |
| def _get_mock_linkedin_jobs(self, keywords: str, location: str) -> List[Dict[str, Any]]: | |
| """Get mock LinkedIn job data for demonstration""" | |
| return [ | |
| { | |
| 'id': 'linkedin_job_1', | |
| 'title': f'Senior {keywords} Engineer', | |
| 'company': 'Tech Innovations Inc.', | |
| 'location': location or 'Remote', | |
| 'description': f'We are looking for a talented {keywords} engineer to join our team...', | |
| 'url': 'https://www.linkedin.com/jobs/view/123456', | |
| 'posted_date': datetime.now().isoformat(), | |
| 'salary': '$120,000 - $180,000', | |
| 'job_type': 'Full-time', | |
| 'experience_level': 'Senior', | |
| 'skills_required': [keywords, 'Python', 'AWS', 'Docker'], | |
| 'source': 'LinkedIn' | |
| }, | |
| { | |
| 'id': 'linkedin_job_2', | |
| 'title': f'{keywords} Developer', | |
| 'company': 'Global Solutions Corp', | |
| 'location': location or 'Hybrid', | |
| 'description': f'Join our growing team as a {keywords} developer...', | |
| 'url': 'https://www.linkedin.com/jobs/view/789012', | |
| 'posted_date': datetime.now().isoformat(), | |
| 'salary': '$90,000 - $130,000', | |
| 'job_type': 'Full-time', | |
| 'experience_level': 'Mid-level', | |
| 'skills_required': [keywords, 'JavaScript', 'React', 'Node.js'], | |
| 'source': 'LinkedIn' | |
| } | |
| ] | |
| def auto_populate_from_linkedin(self, linkedin_url: str) -> Dict[str, Any]: | |
| """ | |
| Auto-populate user data from LinkedIn profile URL | |
| This would scrape or use API to get data | |
| """ | |
| # Extract username from URL | |
| username_match = re.search(r'linkedin\.com/in/([^/]+)', linkedin_url) | |
| if not username_match: | |
| logger.error(f"Invalid LinkedIn URL: {linkedin_url}") | |
| return self._get_profile_template() | |
| username = username_match.group(1) | |
| # In production, this would use LinkedIn API or scraping | |
| # For now, return template with username filled | |
| template = self._get_profile_template() | |
| template['contact']['linkedin'] = linkedin_url | |
| template['contact']['name'] = username.replace('-', ' ').title() | |
| # Add mock data for demonstration | |
| template['summary'] = f"Experienced professional with expertise in various domains. LinkedIn: {username}" | |
| template['skills'] = ['Leadership', 'Project Management', 'Strategic Planning', 'Team Building'] | |
| return template | |
| # Singleton instance | |
| linkedin_extractor = LinkedInProfileExtractor() |