Spaces:

Rogersurf
/

hrhub

Running

File size: 15,016 Bytes

f15d7db

"""
Mock data for HRHUB demo.
This file contains hardcoded data for MVP demonstration.

TO SWITCH TO REAL DATA:
Replace imports in app.py:
    from data.mock_data import get_candidate_data, get_company_matches
    ↓
    from data.data_loader import get_candidate_data, get_company_matches
"""

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Any


def get_candidate_data(candidate_id: int = 0) -> Dict[str, Any]:
    """
    Get candidate data by ID.
    
    Args:
        candidate_id: Candidate identifier (0 for demo)
    
    Returns:
        Dictionary with candidate information
    """
    
    # Mock candidate data (based on your actual structure)
    candidate = {
        'id': 0,
        'name': 'Demo Candidate #0',
        
        # Skills & Expertise
        'skills': [
            'Python', 'Machine Learning', 'Data Science', 'SQL', 'TensorFlow',
            'Pandas', 'NumPy', 'Scikit-learn', 'Deep Learning', 'NLP',
            'Computer Vision', 'AWS', 'Docker', 'Git', 'Agile'
        ],
        
        # Education
        'educational_institution_name': ['Technical University of Denmark'],
        'degree_names': ['Master of Science'],
        'passing_years': ['2023'],
        'educational_results': ['3.8'],
        'result_types': ['GPA'],
        'major_field_of_studies': ['Business Data Science'],
        
        # Work Experience
        'professional_company_names': ['TechCorp', 'DataHub', 'AI Solutions'],
        'company_urls': ['techcorp.com', 'datahub.io', 'aisolutions.ai'],
        'start_dates': ['Jan 2021', 'Jun 2019', 'Jan 2018'],
        'end_dates': ['Current', 'Dec 2020', 'May 2019'],
        'positions': ['Data Scientist', 'ML Engineer', 'Data Analyst'],
        'locations': ['Copenhagen, Denmark', 'Aalborg, Denmark', 'Aarhus, Denmark'],
        'responsibilities': """
            • Developed ML models for customer segmentation
            • Built NLP pipeline for sentiment analysis
            • Deployed models to production using AWS
            • Collaborated with cross-functional teams
            • Mentored junior data scientists
        """,
        
        # Additional Info
        'languages': ['English', 'Danish', 'Portuguese'],
        'proficiency_levels': ['Fluent', 'Native', 'Native'],
        'certification_providers': ['AWS', 'Google Cloud', 'Coursera'],
        'certification_skills': ['AWS ML Specialty', 'GCP Data Engineer', 'Deep Learning'],
        
        # Career Goals
        'career_objective': 'Seeking senior data science role focusing on NLP and LLM applications',
        'job_position_name': 'Senior Data Scientist / ML Engineer',
        
        # Match score (for demo purposes)
        'matched_score': 0.85,
        
        # Text representation (what gets embedded)
        'text': """
            Skills: Python, Machine Learning, Data Science, SQL, TensorFlow, Pandas, NumPy, 
            Scikit-learn, Deep Learning, NLP, Computer Vision, AWS, Docker, Git, Agile.
            
            Education: Master of Science in Business Data Science from Technical University of Denmark (2023).
            
            Experience: Data Scientist at TechCorp (Current), ML Engineer at DataHub, Data Analyst at AI Solutions.
            Specialized in ML model development, NLP, and production deployment.
            
            Languages: English (Fluent), Danish (Native), Portuguese (Native).
            
            Certifications: AWS ML Specialty, GCP Data Engineer, Deep Learning.
        """
    }
    
    return candidate


def get_company_matches(candidate_id: int = 0, top_k: int = 10) -> List[Tuple[int, float, Dict[str, Any]]]:
    """
    Get top company matches for a candidate.
    
    Args:
        candidate_id: Candidate identifier
        top_k: Number of top matches to return
    
    Returns:
        List of tuples: (company_id, similarity_score, company_data)
    """
    
    # Mock company matches
    companies = [
        {
            'id': 29286,
            'name': 'Anblicks',
            'similarity_score': 0.7028,
            'description': 'Leading data analytics and AI consulting firm specializing in cloud-native solutions',
            'industries_list': 'Information Technology, Data Analytics, Cloud Computing',
            'specialties_list': 'Big Data | Machine Learning | Cloud Architecture | Data Engineering',
            'employee_count': '500-1000',
            'city': 'San Francisco',
            'state': 'CA',
            'country': 'USA',
            'required_skills': 'Python | Machine Learning | AWS | TensorFlow | Data Science | SQL | Spark',
            'posted_job_titles': 'Senior Data Scientist | ML Engineer | Data Architect',
            'experience_levels': 'Mid-Senior level | Senior level',
            'work_types': 'Full-time | Remote',
            'text': 'Technology company seeking ML experts with Python, AWS, and production experience...'
        },
        {
            'id': 15234,
            'name': 'iO Associates - US',
            'similarity_score': 0.7026,
            'description': 'Global talent solutions provider connecting tech professionals with innovative companies',
            'industries_list': 'Staffing and Recruiting, Technology',
            'specialties_list': 'Data Science Recruitment | AI/ML Placement | Tech Consulting',
            'employee_count': '1000-5000',
            'city': 'New York',
            'state': 'NY',
            'country': 'USA',
            'required_skills': 'Python | Data Science | Machine Learning | Deep Learning | NLP',
            'posted_job_titles': 'Data Scientist | AI Engineer | Research Scientist',
            'experience_levels': 'Mid-Senior level',
            'work_types': 'Full-time | Contract',
            'text': 'Recruiting firm specializing in data science and AI talent placement...'
        },
        {
            'id': 8721,
            'name': 'DATAECONOMY',
            'similarity_score': 0.6849,
            'description': 'Data platform company building next-gen analytics solutions',
            'industries_list': 'Computer Software, Big Data',
            'specialties_list': 'Data Analytics | Business Intelligence | ETL | Data Warehousing',
            'employee_count': '200-500',
            'city': 'Boston',
            'state': 'MA',
            'country': 'USA',
            'required_skills': 'SQL | Python | Data Modeling | ETL | Tableau | AWS',
            'posted_job_titles': 'Data Engineer | Analytics Engineer | BI Developer',
            'experience_levels': 'Mid level | Mid-Senior level',
            'work_types': 'Full-time | Hybrid',
            'text': 'Building data infrastructure and analytics platforms...'
        },
        {
            'id': 12983,
            'name': 'Datavail',
            'similarity_score': 0.6827,
            'description': 'Database and data management services company',
            'industries_list': 'Information Technology, Database Management',
            'specialties_list': 'Database Administration | Cloud Migration | Performance Tuning',
            'employee_count': '500-1000',
            'city': 'Denver',
            'state': 'CO',
            'country': 'USA',
            'required_skills': 'SQL | Database Design | Python | Cloud Platforms | Performance Optimization',
            'posted_job_titles': 'Database Engineer | Data Platform Engineer | Cloud DBA',
            'experience_levels': 'Mid-Senior level',
            'work_types': 'Full-time | Remote',
            'text': 'Specialized in database management and cloud data solutions...'
        },
        {
            'id': 45672,
            'name': 'BitPusher',
            'similarity_score': 0.6776,
            'description': 'Software development and IT consulting firm',
            'industries_list': 'Computer Software, IT Services',
            'specialties_list': 'Custom Software Development | Cloud Solutions | DevOps',
            'employee_count': '50-200',
            'city': 'Austin',
            'state': 'TX',
            'country': 'USA',
            'required_skills': 'Python | JavaScript | AWS | Docker | Kubernetes | CI/CD',
            'posted_job_titles': 'Software Engineer | DevOps Engineer | Full Stack Developer',
            'experience_levels': 'Entry level | Mid level',
            'work_types': 'Full-time',
            'text': 'Building custom software solutions for enterprise clients...'
        },
        {
            'id': 33421,
            'name': 'Neural Dynamics',
            'similarity_score': 0.6654,
            'description': 'AI research lab focused on neural networks and deep learning',
            'industries_list': 'Research, Artificial Intelligence',
            'specialties_list': 'Deep Learning | Computer Vision | NLP | Reinforcement Learning',
            'employee_count': '100-200',
            'city': 'Seattle',
            'state': 'WA',
            'country': 'USA',
            'required_skills': 'PyTorch | TensorFlow | Deep Learning | Computer Vision | Research',
            'posted_job_titles': 'Research Scientist | ML Researcher | AI Engineer',
            'experience_levels': 'Senior level | Lead',
            'work_types': 'Full-time | Onsite',
            'text': 'Cutting-edge AI research in neural networks and applications...'
        },
        {
            'id': 28945,
            'name': 'CloudScale Analytics',
            'similarity_score': 0.6543,
            'description': 'Cloud-native data analytics platform',
            'industries_list': 'Cloud Computing, Analytics',
            'specialties_list': 'Cloud Analytics | Real-time Processing | Data Pipelines',
            'employee_count': '200-500',
            'city': 'San Jose',
            'state': 'CA',
            'country': 'USA',
            'required_skills': 'AWS | Python | Spark | Kafka | Data Engineering | Distributed Systems',
            'posted_job_titles': 'Data Engineer | Platform Engineer | Solutions Architect',
            'experience_levels': 'Mid-Senior level',
            'work_types': 'Full-time | Remote',
            'text': 'Building scalable data analytics infrastructure in the cloud...'
        },
        {
            'id': 19283,
            'name': 'DataForge Labs',
            'similarity_score': 0.6421,
            'description': 'ML operations and MLOps platform provider',
            'industries_list': 'Machine Learning, DevOps',
            'specialties_list': 'MLOps | Model Deployment | ML Infrastructure | Monitoring',
            'employee_count': '50-100',
            'city': 'Palo Alto',
            'state': 'CA',
            'country': 'USA',
            'required_skills': 'Python | Docker | Kubernetes | ML Deployment | Monitoring Tools',
            'posted_job_titles': 'MLOps Engineer | Platform Engineer | DevOps Engineer',
            'experience_levels': 'Mid level | Mid-Senior level',
            'work_types': 'Full-time | Hybrid',
            'text': 'Helping companies deploy and manage ML models at scale...'
        },
        {
            'id': 51234,
            'name': 'InsightAI',
            'similarity_score': 0.6312,
            'description': 'Business intelligence and predictive analytics company',
            'industries_list': 'Business Intelligence, Predictive Analytics',
            'specialties_list': 'Forecasting | Predictive Modeling | BI Tools | Dashboards',
            'employee_count': '100-200',
            'city': 'Chicago',
            'state': 'IL',
            'country': 'USA',
            'required_skills': 'Python | R | Tableau | PowerBI | Statistical Modeling | SQL',
            'posted_job_titles': 'Data Analyst | BI Developer | Analytics Engineer',
            'experience_levels': 'Mid level',
            'work_types': 'Full-time | Hybrid',
            'text': 'Providing predictive analytics and BI solutions for enterprises...'
        },
        {
            'id': 67821,
            'name': 'QuantumLeap Technologies',
            'similarity_score': 0.6198,
            'description': 'Quantum computing and advanced algorithms research',
            'industries_list': 'Quantum Computing, Research',
            'specialties_list': 'Quantum Algorithms | High-Performance Computing | Cryptography',
            'employee_count': '50-100',
            'city': 'Cambridge',
            'state': 'MA',
            'country': 'USA',
            'required_skills': 'Python | Quantum Computing | Linear Algebra | Algorithms | Research',
            'posted_job_titles': 'Quantum Research Scientist | Algorithm Engineer | Research Engineer',
            'experience_levels': 'Senior level | PhD level',
            'work_types': 'Full-time | Onsite',
            'text': 'Pioneering quantum computing applications and algorithms...'
        }
    ]
    
    # Return as list of tuples
    matches = [
        (comp['id'], comp['similarity_score'], comp)
        for comp in companies[:top_k]
    ]
    
    return matches


def get_network_graph_data(candidate_id: int = 0, top_k: int = 10) -> Dict[str, Any]:
    """
    Generate network graph data for visualization.
    
    Args:
        candidate_id: Candidate identifier
        top_k: Number of companies to include
    
    Returns:
        Dictionary with nodes and edges for network graph
    """
    
    candidate = get_candidate_data(candidate_id)
    matches = get_company_matches(candidate_id, top_k)
    
    # Create nodes
    nodes = []
    
    # Add candidate node
    nodes.append({
        'id': f'C{candidate_id}',
        'label': f"Candidate #{candidate_id}",
        'title': candidate['name'],
        'color': '#00FF00',  # Green
        'shape': 'dot',
        'size': 25
    })
    
    # Add company nodes
    for comp_id, score, comp_data in matches:
        nodes.append({
            'id': f'J{comp_id}',
            'label': comp_data['name'][:20],  # Truncate long names
            'title': f"{comp_data['name']}\nScore: {score:.4f}",
            'color': '#FF0000',  # Red
            'shape': 'square',
            'size': 15 + (score * 20)  # Size based on score
        })
    
    # Create edges (connections)
    edges = []
    
    for comp_id, score, comp_data in matches:
        edges.append({
            'from': f'C{candidate_id}',
            'to': f'J{comp_id}',
            'value': score,  # Line thickness
            'title': f'Match Score: {score:.4f}',
            'color': {'opacity': score}  # Transparency based on score
        })
    
    return {
        'nodes': nodes,
        'edges': edges
    }


# For testing
if __name__ == "__main__":
    # Test functions
    candidate = get_candidate_data(0)
    print(f"✅ Candidate: {candidate['name']}")
    
    matches = get_company_matches(0, 5)
    print(f"✅ Top 5 matches loaded")
    
    graph_data = get_network_graph_data(0, 5)
    print(f"✅ Graph data: {len(graph_data['nodes'])} nodes, {len(graph_data['edges'])} edges")