File size: 15,016 Bytes
f15d7db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 |
"""
Mock data for HRHUB demo.
This file contains hardcoded data for MVP demonstration.
TO SWITCH TO REAL DATA:
Replace imports in app.py:
from data.mock_data import get_candidate_data, get_company_matches
↓
from data.data_loader import get_candidate_data, get_company_matches
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Any
def get_candidate_data(candidate_id: int = 0) -> Dict[str, Any]:
"""
Get candidate data by ID.
Args:
candidate_id: Candidate identifier (0 for demo)
Returns:
Dictionary with candidate information
"""
# Mock candidate data (based on your actual structure)
candidate = {
'id': 0,
'name': 'Demo Candidate #0',
# Skills & Expertise
'skills': [
'Python', 'Machine Learning', 'Data Science', 'SQL', 'TensorFlow',
'Pandas', 'NumPy', 'Scikit-learn', 'Deep Learning', 'NLP',
'Computer Vision', 'AWS', 'Docker', 'Git', 'Agile'
],
# Education
'educational_institution_name': ['Technical University of Denmark'],
'degree_names': ['Master of Science'],
'passing_years': ['2023'],
'educational_results': ['3.8'],
'result_types': ['GPA'],
'major_field_of_studies': ['Business Data Science'],
# Work Experience
'professional_company_names': ['TechCorp', 'DataHub', 'AI Solutions'],
'company_urls': ['techcorp.com', 'datahub.io', 'aisolutions.ai'],
'start_dates': ['Jan 2021', 'Jun 2019', 'Jan 2018'],
'end_dates': ['Current', 'Dec 2020', 'May 2019'],
'positions': ['Data Scientist', 'ML Engineer', 'Data Analyst'],
'locations': ['Copenhagen, Denmark', 'Aalborg, Denmark', 'Aarhus, Denmark'],
'responsibilities': """
• Developed ML models for customer segmentation
• Built NLP pipeline for sentiment analysis
• Deployed models to production using AWS
• Collaborated with cross-functional teams
• Mentored junior data scientists
""",
# Additional Info
'languages': ['English', 'Danish', 'Portuguese'],
'proficiency_levels': ['Fluent', 'Native', 'Native'],
'certification_providers': ['AWS', 'Google Cloud', 'Coursera'],
'certification_skills': ['AWS ML Specialty', 'GCP Data Engineer', 'Deep Learning'],
# Career Goals
'career_objective': 'Seeking senior data science role focusing on NLP and LLM applications',
'job_position_name': 'Senior Data Scientist / ML Engineer',
# Match score (for demo purposes)
'matched_score': 0.85,
# Text representation (what gets embedded)
'text': """
Skills: Python, Machine Learning, Data Science, SQL, TensorFlow, Pandas, NumPy,
Scikit-learn, Deep Learning, NLP, Computer Vision, AWS, Docker, Git, Agile.
Education: Master of Science in Business Data Science from Technical University of Denmark (2023).
Experience: Data Scientist at TechCorp (Current), ML Engineer at DataHub, Data Analyst at AI Solutions.
Specialized in ML model development, NLP, and production deployment.
Languages: English (Fluent), Danish (Native), Portuguese (Native).
Certifications: AWS ML Specialty, GCP Data Engineer, Deep Learning.
"""
}
return candidate
def get_company_matches(candidate_id: int = 0, top_k: int = 10) -> List[Tuple[int, float, Dict[str, Any]]]:
"""
Get top company matches for a candidate.
Args:
candidate_id: Candidate identifier
top_k: Number of top matches to return
Returns:
List of tuples: (company_id, similarity_score, company_data)
"""
# Mock company matches
companies = [
{
'id': 29286,
'name': 'Anblicks',
'similarity_score': 0.7028,
'description': 'Leading data analytics and AI consulting firm specializing in cloud-native solutions',
'industries_list': 'Information Technology, Data Analytics, Cloud Computing',
'specialties_list': 'Big Data | Machine Learning | Cloud Architecture | Data Engineering',
'employee_count': '500-1000',
'city': 'San Francisco',
'state': 'CA',
'country': 'USA',
'required_skills': 'Python | Machine Learning | AWS | TensorFlow | Data Science | SQL | Spark',
'posted_job_titles': 'Senior Data Scientist | ML Engineer | Data Architect',
'experience_levels': 'Mid-Senior level | Senior level',
'work_types': 'Full-time | Remote',
'text': 'Technology company seeking ML experts with Python, AWS, and production experience...'
},
{
'id': 15234,
'name': 'iO Associates - US',
'similarity_score': 0.7026,
'description': 'Global talent solutions provider connecting tech professionals with innovative companies',
'industries_list': 'Staffing and Recruiting, Technology',
'specialties_list': 'Data Science Recruitment | AI/ML Placement | Tech Consulting',
'employee_count': '1000-5000',
'city': 'New York',
'state': 'NY',
'country': 'USA',
'required_skills': 'Python | Data Science | Machine Learning | Deep Learning | NLP',
'posted_job_titles': 'Data Scientist | AI Engineer | Research Scientist',
'experience_levels': 'Mid-Senior level',
'work_types': 'Full-time | Contract',
'text': 'Recruiting firm specializing in data science and AI talent placement...'
},
{
'id': 8721,
'name': 'DATAECONOMY',
'similarity_score': 0.6849,
'description': 'Data platform company building next-gen analytics solutions',
'industries_list': 'Computer Software, Big Data',
'specialties_list': 'Data Analytics | Business Intelligence | ETL | Data Warehousing',
'employee_count': '200-500',
'city': 'Boston',
'state': 'MA',
'country': 'USA',
'required_skills': 'SQL | Python | Data Modeling | ETL | Tableau | AWS',
'posted_job_titles': 'Data Engineer | Analytics Engineer | BI Developer',
'experience_levels': 'Mid level | Mid-Senior level',
'work_types': 'Full-time | Hybrid',
'text': 'Building data infrastructure and analytics platforms...'
},
{
'id': 12983,
'name': 'Datavail',
'similarity_score': 0.6827,
'description': 'Database and data management services company',
'industries_list': 'Information Technology, Database Management',
'specialties_list': 'Database Administration | Cloud Migration | Performance Tuning',
'employee_count': '500-1000',
'city': 'Denver',
'state': 'CO',
'country': 'USA',
'required_skills': 'SQL | Database Design | Python | Cloud Platforms | Performance Optimization',
'posted_job_titles': 'Database Engineer | Data Platform Engineer | Cloud DBA',
'experience_levels': 'Mid-Senior level',
'work_types': 'Full-time | Remote',
'text': 'Specialized in database management and cloud data solutions...'
},
{
'id': 45672,
'name': 'BitPusher',
'similarity_score': 0.6776,
'description': 'Software development and IT consulting firm',
'industries_list': 'Computer Software, IT Services',
'specialties_list': 'Custom Software Development | Cloud Solutions | DevOps',
'employee_count': '50-200',
'city': 'Austin',
'state': 'TX',
'country': 'USA',
'required_skills': 'Python | JavaScript | AWS | Docker | Kubernetes | CI/CD',
'posted_job_titles': 'Software Engineer | DevOps Engineer | Full Stack Developer',
'experience_levels': 'Entry level | Mid level',
'work_types': 'Full-time',
'text': 'Building custom software solutions for enterprise clients...'
},
{
'id': 33421,
'name': 'Neural Dynamics',
'similarity_score': 0.6654,
'description': 'AI research lab focused on neural networks and deep learning',
'industries_list': 'Research, Artificial Intelligence',
'specialties_list': 'Deep Learning | Computer Vision | NLP | Reinforcement Learning',
'employee_count': '100-200',
'city': 'Seattle',
'state': 'WA',
'country': 'USA',
'required_skills': 'PyTorch | TensorFlow | Deep Learning | Computer Vision | Research',
'posted_job_titles': 'Research Scientist | ML Researcher | AI Engineer',
'experience_levels': 'Senior level | Lead',
'work_types': 'Full-time | Onsite',
'text': 'Cutting-edge AI research in neural networks and applications...'
},
{
'id': 28945,
'name': 'CloudScale Analytics',
'similarity_score': 0.6543,
'description': 'Cloud-native data analytics platform',
'industries_list': 'Cloud Computing, Analytics',
'specialties_list': 'Cloud Analytics | Real-time Processing | Data Pipelines',
'employee_count': '200-500',
'city': 'San Jose',
'state': 'CA',
'country': 'USA',
'required_skills': 'AWS | Python | Spark | Kafka | Data Engineering | Distributed Systems',
'posted_job_titles': 'Data Engineer | Platform Engineer | Solutions Architect',
'experience_levels': 'Mid-Senior level',
'work_types': 'Full-time | Remote',
'text': 'Building scalable data analytics infrastructure in the cloud...'
},
{
'id': 19283,
'name': 'DataForge Labs',
'similarity_score': 0.6421,
'description': 'ML operations and MLOps platform provider',
'industries_list': 'Machine Learning, DevOps',
'specialties_list': 'MLOps | Model Deployment | ML Infrastructure | Monitoring',
'employee_count': '50-100',
'city': 'Palo Alto',
'state': 'CA',
'country': 'USA',
'required_skills': 'Python | Docker | Kubernetes | ML Deployment | Monitoring Tools',
'posted_job_titles': 'MLOps Engineer | Platform Engineer | DevOps Engineer',
'experience_levels': 'Mid level | Mid-Senior level',
'work_types': 'Full-time | Hybrid',
'text': 'Helping companies deploy and manage ML models at scale...'
},
{
'id': 51234,
'name': 'InsightAI',
'similarity_score': 0.6312,
'description': 'Business intelligence and predictive analytics company',
'industries_list': 'Business Intelligence, Predictive Analytics',
'specialties_list': 'Forecasting | Predictive Modeling | BI Tools | Dashboards',
'employee_count': '100-200',
'city': 'Chicago',
'state': 'IL',
'country': 'USA',
'required_skills': 'Python | R | Tableau | PowerBI | Statistical Modeling | SQL',
'posted_job_titles': 'Data Analyst | BI Developer | Analytics Engineer',
'experience_levels': 'Mid level',
'work_types': 'Full-time | Hybrid',
'text': 'Providing predictive analytics and BI solutions for enterprises...'
},
{
'id': 67821,
'name': 'QuantumLeap Technologies',
'similarity_score': 0.6198,
'description': 'Quantum computing and advanced algorithms research',
'industries_list': 'Quantum Computing, Research',
'specialties_list': 'Quantum Algorithms | High-Performance Computing | Cryptography',
'employee_count': '50-100',
'city': 'Cambridge',
'state': 'MA',
'country': 'USA',
'required_skills': 'Python | Quantum Computing | Linear Algebra | Algorithms | Research',
'posted_job_titles': 'Quantum Research Scientist | Algorithm Engineer | Research Engineer',
'experience_levels': 'Senior level | PhD level',
'work_types': 'Full-time | Onsite',
'text': 'Pioneering quantum computing applications and algorithms...'
}
]
# Return as list of tuples
matches = [
(comp['id'], comp['similarity_score'], comp)
for comp in companies[:top_k]
]
return matches
def get_network_graph_data(candidate_id: int = 0, top_k: int = 10) -> Dict[str, Any]:
"""
Generate network graph data for visualization.
Args:
candidate_id: Candidate identifier
top_k: Number of companies to include
Returns:
Dictionary with nodes and edges for network graph
"""
candidate = get_candidate_data(candidate_id)
matches = get_company_matches(candidate_id, top_k)
# Create nodes
nodes = []
# Add candidate node
nodes.append({
'id': f'C{candidate_id}',
'label': f"Candidate #{candidate_id}",
'title': candidate['name'],
'color': '#00FF00', # Green
'shape': 'dot',
'size': 25
})
# Add company nodes
for comp_id, score, comp_data in matches:
nodes.append({
'id': f'J{comp_id}',
'label': comp_data['name'][:20], # Truncate long names
'title': f"{comp_data['name']}\nScore: {score:.4f}",
'color': '#FF0000', # Red
'shape': 'square',
'size': 15 + (score * 20) # Size based on score
})
# Create edges (connections)
edges = []
for comp_id, score, comp_data in matches:
edges.append({
'from': f'C{candidate_id}',
'to': f'J{comp_id}',
'value': score, # Line thickness
'title': f'Match Score: {score:.4f}',
'color': {'opacity': score} # Transparency based on score
})
return {
'nodes': nodes,
'edges': edges
}
# For testing
if __name__ == "__main__":
# Test functions
candidate = get_candidate_data(0)
print(f"✅ Candidate: {candidate['name']}")
matches = get_company_matches(0, 5)
print(f"✅ Top 5 matches loaded")
graph_data = get_network_graph_data(0, 5)
print(f"✅ Graph data: {len(graph_data['nodes'])} nodes, {len(graph_data['edges'])} edges")
|