Spaces:

Rogersurf
/

hrhub

Running

hrhub / data /mock_data.py

Roger Surf

Refactor: Professional Streamlit MVP

f15d7db 14 days ago

15 kB

	"""
	Mock data for HRHUB demo.
	This file contains hardcoded data for MVP demonstration.

	TO SWITCH TO REAL DATA:
	Replace imports in app.py:
	from data.mock_data import get_candidate_data, get_company_matches
	↓
	from data.data_loader import get_candidate_data, get_company_matches
	"""

	import pandas as pd
	import numpy as np
	from typing import Dict, List, Tuple, Any


	def get_candidate_data(candidate_id: int = 0) -> Dict[str, Any]:
	"""
	Get candidate data by ID.

	Args:
	candidate_id: Candidate identifier (0 for demo)

	Returns:
	Dictionary with candidate information
	"""

	# Mock candidate data (based on your actual structure)
	candidate = {
	'id': 0,
	'name': 'Demo Candidate #0',

	# Skills & Expertise
	'skills': [
	'Python', 'Machine Learning', 'Data Science', 'SQL', 'TensorFlow',
	'Pandas', 'NumPy', 'Scikit-learn', 'Deep Learning', 'NLP',
	'Computer Vision', 'AWS', 'Docker', 'Git', 'Agile'
	],

	# Education
	'educational_institution_name': ['Technical University of Denmark'],
	'degree_names': ['Master of Science'],
	'passing_years': ['2023'],
	'educational_results': ['3.8'],
	'result_types': ['GPA'],
	'major_field_of_studies': ['Business Data Science'],

	# Work Experience
	'professional_company_names': ['TechCorp', 'DataHub', 'AI Solutions'],
	'company_urls': ['techcorp.com', 'datahub.io', 'aisolutions.ai'],
	'start_dates': ['Jan 2021', 'Jun 2019', 'Jan 2018'],
	'end_dates': ['Current', 'Dec 2020', 'May 2019'],
	'positions': ['Data Scientist', 'ML Engineer', 'Data Analyst'],
	'locations': ['Copenhagen, Denmark', 'Aalborg, Denmark', 'Aarhus, Denmark'],
	'responsibilities': """
	• Developed ML models for customer segmentation
	• Built NLP pipeline for sentiment analysis
	• Deployed models to production using AWS
	• Collaborated with cross-functional teams
	• Mentored junior data scientists
	""",

	# Additional Info
	'languages': ['English', 'Danish', 'Portuguese'],
	'proficiency_levels': ['Fluent', 'Native', 'Native'],
	'certification_providers': ['AWS', 'Google Cloud', 'Coursera'],
	'certification_skills': ['AWS ML Specialty', 'GCP Data Engineer', 'Deep Learning'],

	# Career Goals
	'career_objective': 'Seeking senior data science role focusing on NLP and LLM applications',
	'job_position_name': 'Senior Data Scientist / ML Engineer',

	# Match score (for demo purposes)
	'matched_score': 0.85,

	# Text representation (what gets embedded)
	'text': """
	Skills: Python, Machine Learning, Data Science, SQL, TensorFlow, Pandas, NumPy,
	Scikit-learn, Deep Learning, NLP, Computer Vision, AWS, Docker, Git, Agile.

	Education: Master of Science in Business Data Science from Technical University of Denmark (2023).

	Experience: Data Scientist at TechCorp (Current), ML Engineer at DataHub, Data Analyst at AI Solutions.
	Specialized in ML model development, NLP, and production deployment.

	Languages: English (Fluent), Danish (Native), Portuguese (Native).

	Certifications: AWS ML Specialty, GCP Data Engineer, Deep Learning.
	"""
	}

	return candidate


	def get_company_matches(candidate_id: int = 0, top_k: int = 10) -> List[Tuple[int, float, Dict[str, Any]]]:
	"""
	Get top company matches for a candidate.

	Args:
	candidate_id: Candidate identifier
	top_k: Number of top matches to return

	Returns:
	List of tuples: (company_id, similarity_score, company_data)
	"""

	# Mock company matches
	companies = [
	{
	'id': 29286,
	'name': 'Anblicks',
	'similarity_score': 0.7028,
	'description': 'Leading data analytics and AI consulting firm specializing in cloud-native solutions',
	'industries_list': 'Information Technology, Data Analytics, Cloud Computing',
	'specialties_list': 'Big Data \| Machine Learning \| Cloud Architecture \| Data Engineering',
	'employee_count': '500-1000',
	'city': 'San Francisco',
	'state': 'CA',
	'country': 'USA',
	'required_skills': 'Python \| Machine Learning \| AWS \| TensorFlow \| Data Science \| SQL \| Spark',
	'posted_job_titles': 'Senior Data Scientist \| ML Engineer \| Data Architect',
	'experience_levels': 'Mid-Senior level \| Senior level',
	'work_types': 'Full-time \| Remote',
	'text': 'Technology company seeking ML experts with Python, AWS, and production experience...'
	},
	{
	'id': 15234,
	'name': 'iO Associates - US',
	'similarity_score': 0.7026,
	'description': 'Global talent solutions provider connecting tech professionals with innovative companies',
	'industries_list': 'Staffing and Recruiting, Technology',
	'specialties_list': 'Data Science Recruitment \| AI/ML Placement \| Tech Consulting',
	'employee_count': '1000-5000',
	'city': 'New York',
	'state': 'NY',
	'country': 'USA',
	'required_skills': 'Python \| Data Science \| Machine Learning \| Deep Learning \| NLP',
	'posted_job_titles': 'Data Scientist \| AI Engineer \| Research Scientist',
	'experience_levels': 'Mid-Senior level',
	'work_types': 'Full-time \| Contract',
	'text': 'Recruiting firm specializing in data science and AI talent placement...'
	},
	{
	'id': 8721,
	'name': 'DATAECONOMY',
	'similarity_score': 0.6849,
	'description': 'Data platform company building next-gen analytics solutions',
	'industries_list': 'Computer Software, Big Data',
	'specialties_list': 'Data Analytics \| Business Intelligence \| ETL \| Data Warehousing',
	'employee_count': '200-500',
	'city': 'Boston',
	'state': 'MA',
	'country': 'USA',
	'required_skills': 'SQL \| Python \| Data Modeling \| ETL \| Tableau \| AWS',
	'posted_job_titles': 'Data Engineer \| Analytics Engineer \| BI Developer',
	'experience_levels': 'Mid level \| Mid-Senior level',
	'work_types': 'Full-time \| Hybrid',
	'text': 'Building data infrastructure and analytics platforms...'
	},
	{
	'id': 12983,
	'name': 'Datavail',
	'similarity_score': 0.6827,
	'description': 'Database and data management services company',
	'industries_list': 'Information Technology, Database Management',
	'specialties_list': 'Database Administration \| Cloud Migration \| Performance Tuning',
	'employee_count': '500-1000',
	'city': 'Denver',
	'state': 'CO',
	'country': 'USA',
	'required_skills': 'SQL \| Database Design \| Python \| Cloud Platforms \| Performance Optimization',
	'posted_job_titles': 'Database Engineer \| Data Platform Engineer \| Cloud DBA',
	'experience_levels': 'Mid-Senior level',
	'work_types': 'Full-time \| Remote',
	'text': 'Specialized in database management and cloud data solutions...'
	},
	{
	'id': 45672,
	'name': 'BitPusher',
	'similarity_score': 0.6776,
	'description': 'Software development and IT consulting firm',
	'industries_list': 'Computer Software, IT Services',
	'specialties_list': 'Custom Software Development \| Cloud Solutions \| DevOps',
	'employee_count': '50-200',
	'city': 'Austin',
	'state': 'TX',
	'country': 'USA',
	'required_skills': 'Python \| JavaScript \| AWS \| Docker \| Kubernetes \| CI/CD',
	'posted_job_titles': 'Software Engineer \| DevOps Engineer \| Full Stack Developer',
	'experience_levels': 'Entry level \| Mid level',
	'work_types': 'Full-time',
	'text': 'Building custom software solutions for enterprise clients...'
	},
	{
	'id': 33421,
	'name': 'Neural Dynamics',
	'similarity_score': 0.6654,
	'description': 'AI research lab focused on neural networks and deep learning',
	'industries_list': 'Research, Artificial Intelligence',
	'specialties_list': 'Deep Learning \| Computer Vision \| NLP \| Reinforcement Learning',
	'employee_count': '100-200',
	'city': 'Seattle',
	'state': 'WA',
	'country': 'USA',
	'required_skills': 'PyTorch \| TensorFlow \| Deep Learning \| Computer Vision \| Research',
	'posted_job_titles': 'Research Scientist \| ML Researcher \| AI Engineer',
	'experience_levels': 'Senior level \| Lead',
	'work_types': 'Full-time \| Onsite',
	'text': 'Cutting-edge AI research in neural networks and applications...'
	},
	{
	'id': 28945,
	'name': 'CloudScale Analytics',
	'similarity_score': 0.6543,
	'description': 'Cloud-native data analytics platform',
	'industries_list': 'Cloud Computing, Analytics',
	'specialties_list': 'Cloud Analytics \| Real-time Processing \| Data Pipelines',
	'employee_count': '200-500',
	'city': 'San Jose',
	'state': 'CA',
	'country': 'USA',
	'required_skills': 'AWS \| Python \| Spark \| Kafka \| Data Engineering \| Distributed Systems',
	'posted_job_titles': 'Data Engineer \| Platform Engineer \| Solutions Architect',
	'experience_levels': 'Mid-Senior level',
	'work_types': 'Full-time \| Remote',
	'text': 'Building scalable data analytics infrastructure in the cloud...'
	},
	{
	'id': 19283,
	'name': 'DataForge Labs',
	'similarity_score': 0.6421,
	'description': 'ML operations and MLOps platform provider',
	'industries_list': 'Machine Learning, DevOps',
	'specialties_list': 'MLOps \| Model Deployment \| ML Infrastructure \| Monitoring',
	'employee_count': '50-100',
	'city': 'Palo Alto',
	'state': 'CA',
	'country': 'USA',
	'required_skills': 'Python \| Docker \| Kubernetes \| ML Deployment \| Monitoring Tools',
	'posted_job_titles': 'MLOps Engineer \| Platform Engineer \| DevOps Engineer',
	'experience_levels': 'Mid level \| Mid-Senior level',
	'work_types': 'Full-time \| Hybrid',
	'text': 'Helping companies deploy and manage ML models at scale...'
	},
	{
	'id': 51234,
	'name': 'InsightAI',
	'similarity_score': 0.6312,
	'description': 'Business intelligence and predictive analytics company',
	'industries_list': 'Business Intelligence, Predictive Analytics',
	'specialties_list': 'Forecasting \| Predictive Modeling \| BI Tools \| Dashboards',
	'employee_count': '100-200',
	'city': 'Chicago',
	'state': 'IL',
	'country': 'USA',
	'required_skills': 'Python \| R \| Tableau \| PowerBI \| Statistical Modeling \| SQL',
	'posted_job_titles': 'Data Analyst \| BI Developer \| Analytics Engineer',
	'experience_levels': 'Mid level',
	'work_types': 'Full-time \| Hybrid',
	'text': 'Providing predictive analytics and BI solutions for enterprises...'
	},
	{
	'id': 67821,
	'name': 'QuantumLeap Technologies',
	'similarity_score': 0.6198,
	'description': 'Quantum computing and advanced algorithms research',
	'industries_list': 'Quantum Computing, Research',
	'specialties_list': 'Quantum Algorithms \| High-Performance Computing \| Cryptography',
	'employee_count': '50-100',
	'city': 'Cambridge',
	'state': 'MA',
	'country': 'USA',
	'required_skills': 'Python \| Quantum Computing \| Linear Algebra \| Algorithms \| Research',
	'posted_job_titles': 'Quantum Research Scientist \| Algorithm Engineer \| Research Engineer',
	'experience_levels': 'Senior level \| PhD level',
	'work_types': 'Full-time \| Onsite',
	'text': 'Pioneering quantum computing applications and algorithms...'
	}
	]

	# Return as list of tuples
	matches = [
	(comp['id'], comp['similarity_score'], comp)
	for comp in companies[:top_k]
	]

	return matches


	def get_network_graph_data(candidate_id: int = 0, top_k: int = 10) -> Dict[str, Any]:
	"""
	Generate network graph data for visualization.

	Args:
	candidate_id: Candidate identifier
	top_k: Number of companies to include

	Returns:
	Dictionary with nodes and edges for network graph
	"""

	candidate = get_candidate_data(candidate_id)
	matches = get_company_matches(candidate_id, top_k)

	# Create nodes
	nodes = []

	# Add candidate node
	nodes.append({
	'id': f'C{candidate_id}',
	'label': f"Candidate #{candidate_id}",
	'title': candidate['name'],
	'color': '#00FF00', # Green
	'shape': 'dot',
	'size': 25
	})

	# Add company nodes
	for comp_id, score, comp_data in matches:
	nodes.append({
	'id': f'J{comp_id}',
	'label': comp_data['name'][:20], # Truncate long names
	'title': f"{comp_data['name']}\nScore: {score:.4f}",
	'color': '#FF0000', # Red
	'shape': 'square',
	'size': 15 + (score * 20) # Size based on score
	})

	# Create edges (connections)
	edges = []

	for comp_id, score, comp_data in matches:
	edges.append({
	'from': f'C{candidate_id}',
	'to': f'J{comp_id}',
	'value': score, # Line thickness
	'title': f'Match Score: {score:.4f}',
	'color': {'opacity': score} # Transparency based on score
	})

	return {
	'nodes': nodes,
	'edges': edges
	}


	# For testing
	if __name__ == "__main__":
	# Test functions
	candidate = get_candidate_data(0)
	print(f"✅ Candidate: {candidate['name']}")

	matches = get_company_matches(0, 5)
	print(f"✅ Top 5 matches loaded")

	graph_data = get_network_graph_data(0, 5)
	print(f"✅ Graph data: {len(graph_data['nodes'])} nodes, {len(graph_data['edges'])} edges")