File size: 15,016 Bytes
f15d7db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
"""
Mock data for HRHUB demo.
This file contains hardcoded data for MVP demonstration.

TO SWITCH TO REAL DATA:
Replace imports in app.py:
    from data.mock_data import get_candidate_data, get_company_matches
    ↓
    from data.data_loader import get_candidate_data, get_company_matches
"""

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Any


def get_candidate_data(candidate_id: int = 0) -> Dict[str, Any]:
    """
    Get candidate data by ID.
    
    Args:
        candidate_id: Candidate identifier (0 for demo)
    
    Returns:
        Dictionary with candidate information
    """
    
    # Mock candidate data (based on your actual structure)
    candidate = {
        'id': 0,
        'name': 'Demo Candidate #0',
        
        # Skills & Expertise
        'skills': [
            'Python', 'Machine Learning', 'Data Science', 'SQL', 'TensorFlow',
            'Pandas', 'NumPy', 'Scikit-learn', 'Deep Learning', 'NLP',
            'Computer Vision', 'AWS', 'Docker', 'Git', 'Agile'
        ],
        
        # Education
        'educational_institution_name': ['Technical University of Denmark'],
        'degree_names': ['Master of Science'],
        'passing_years': ['2023'],
        'educational_results': ['3.8'],
        'result_types': ['GPA'],
        'major_field_of_studies': ['Business Data Science'],
        
        # Work Experience
        'professional_company_names': ['TechCorp', 'DataHub', 'AI Solutions'],
        'company_urls': ['techcorp.com', 'datahub.io', 'aisolutions.ai'],
        'start_dates': ['Jan 2021', 'Jun 2019', 'Jan 2018'],
        'end_dates': ['Current', 'Dec 2020', 'May 2019'],
        'positions': ['Data Scientist', 'ML Engineer', 'Data Analyst'],
        'locations': ['Copenhagen, Denmark', 'Aalborg, Denmark', 'Aarhus, Denmark'],
        'responsibilities': """
            • Developed ML models for customer segmentation
            • Built NLP pipeline for sentiment analysis
            • Deployed models to production using AWS
            • Collaborated with cross-functional teams
            • Mentored junior data scientists
        """,
        
        # Additional Info
        'languages': ['English', 'Danish', 'Portuguese'],
        'proficiency_levels': ['Fluent', 'Native', 'Native'],
        'certification_providers': ['AWS', 'Google Cloud', 'Coursera'],
        'certification_skills': ['AWS ML Specialty', 'GCP Data Engineer', 'Deep Learning'],
        
        # Career Goals
        'career_objective': 'Seeking senior data science role focusing on NLP and LLM applications',
        'job_position_name': 'Senior Data Scientist / ML Engineer',
        
        # Match score (for demo purposes)
        'matched_score': 0.85,
        
        # Text representation (what gets embedded)
        'text': """
            Skills: Python, Machine Learning, Data Science, SQL, TensorFlow, Pandas, NumPy, 
            Scikit-learn, Deep Learning, NLP, Computer Vision, AWS, Docker, Git, Agile.
            
            Education: Master of Science in Business Data Science from Technical University of Denmark (2023).
            
            Experience: Data Scientist at TechCorp (Current), ML Engineer at DataHub, Data Analyst at AI Solutions.
            Specialized in ML model development, NLP, and production deployment.
            
            Languages: English (Fluent), Danish (Native), Portuguese (Native).
            
            Certifications: AWS ML Specialty, GCP Data Engineer, Deep Learning.
        """
    }
    
    return candidate


def get_company_matches(candidate_id: int = 0, top_k: int = 10) -> List[Tuple[int, float, Dict[str, Any]]]:
    """
    Get top company matches for a candidate.
    
    Args:
        candidate_id: Candidate identifier
        top_k: Number of top matches to return
    
    Returns:
        List of tuples: (company_id, similarity_score, company_data)
    """
    
    # Mock company matches
    companies = [
        {
            'id': 29286,
            'name': 'Anblicks',
            'similarity_score': 0.7028,
            'description': 'Leading data analytics and AI consulting firm specializing in cloud-native solutions',
            'industries_list': 'Information Technology, Data Analytics, Cloud Computing',
            'specialties_list': 'Big Data | Machine Learning | Cloud Architecture | Data Engineering',
            'employee_count': '500-1000',
            'city': 'San Francisco',
            'state': 'CA',
            'country': 'USA',
            'required_skills': 'Python | Machine Learning | AWS | TensorFlow | Data Science | SQL | Spark',
            'posted_job_titles': 'Senior Data Scientist | ML Engineer | Data Architect',
            'experience_levels': 'Mid-Senior level | Senior level',
            'work_types': 'Full-time | Remote',
            'text': 'Technology company seeking ML experts with Python, AWS, and production experience...'
        },
        {
            'id': 15234,
            'name': 'iO Associates - US',
            'similarity_score': 0.7026,
            'description': 'Global talent solutions provider connecting tech professionals with innovative companies',
            'industries_list': 'Staffing and Recruiting, Technology',
            'specialties_list': 'Data Science Recruitment | AI/ML Placement | Tech Consulting',
            'employee_count': '1000-5000',
            'city': 'New York',
            'state': 'NY',
            'country': 'USA',
            'required_skills': 'Python | Data Science | Machine Learning | Deep Learning | NLP',
            'posted_job_titles': 'Data Scientist | AI Engineer | Research Scientist',
            'experience_levels': 'Mid-Senior level',
            'work_types': 'Full-time | Contract',
            'text': 'Recruiting firm specializing in data science and AI talent placement...'
        },
        {
            'id': 8721,
            'name': 'DATAECONOMY',
            'similarity_score': 0.6849,
            'description': 'Data platform company building next-gen analytics solutions',
            'industries_list': 'Computer Software, Big Data',
            'specialties_list': 'Data Analytics | Business Intelligence | ETL | Data Warehousing',
            'employee_count': '200-500',
            'city': 'Boston',
            'state': 'MA',
            'country': 'USA',
            'required_skills': 'SQL | Python | Data Modeling | ETL | Tableau | AWS',
            'posted_job_titles': 'Data Engineer | Analytics Engineer | BI Developer',
            'experience_levels': 'Mid level | Mid-Senior level',
            'work_types': 'Full-time | Hybrid',
            'text': 'Building data infrastructure and analytics platforms...'
        },
        {
            'id': 12983,
            'name': 'Datavail',
            'similarity_score': 0.6827,
            'description': 'Database and data management services company',
            'industries_list': 'Information Technology, Database Management',
            'specialties_list': 'Database Administration | Cloud Migration | Performance Tuning',
            'employee_count': '500-1000',
            'city': 'Denver',
            'state': 'CO',
            'country': 'USA',
            'required_skills': 'SQL | Database Design | Python | Cloud Platforms | Performance Optimization',
            'posted_job_titles': 'Database Engineer | Data Platform Engineer | Cloud DBA',
            'experience_levels': 'Mid-Senior level',
            'work_types': 'Full-time | Remote',
            'text': 'Specialized in database management and cloud data solutions...'
        },
        {
            'id': 45672,
            'name': 'BitPusher',
            'similarity_score': 0.6776,
            'description': 'Software development and IT consulting firm',
            'industries_list': 'Computer Software, IT Services',
            'specialties_list': 'Custom Software Development | Cloud Solutions | DevOps',
            'employee_count': '50-200',
            'city': 'Austin',
            'state': 'TX',
            'country': 'USA',
            'required_skills': 'Python | JavaScript | AWS | Docker | Kubernetes | CI/CD',
            'posted_job_titles': 'Software Engineer | DevOps Engineer | Full Stack Developer',
            'experience_levels': 'Entry level | Mid level',
            'work_types': 'Full-time',
            'text': 'Building custom software solutions for enterprise clients...'
        },
        {
            'id': 33421,
            'name': 'Neural Dynamics',
            'similarity_score': 0.6654,
            'description': 'AI research lab focused on neural networks and deep learning',
            'industries_list': 'Research, Artificial Intelligence',
            'specialties_list': 'Deep Learning | Computer Vision | NLP | Reinforcement Learning',
            'employee_count': '100-200',
            'city': 'Seattle',
            'state': 'WA',
            'country': 'USA',
            'required_skills': 'PyTorch | TensorFlow | Deep Learning | Computer Vision | Research',
            'posted_job_titles': 'Research Scientist | ML Researcher | AI Engineer',
            'experience_levels': 'Senior level | Lead',
            'work_types': 'Full-time | Onsite',
            'text': 'Cutting-edge AI research in neural networks and applications...'
        },
        {
            'id': 28945,
            'name': 'CloudScale Analytics',
            'similarity_score': 0.6543,
            'description': 'Cloud-native data analytics platform',
            'industries_list': 'Cloud Computing, Analytics',
            'specialties_list': 'Cloud Analytics | Real-time Processing | Data Pipelines',
            'employee_count': '200-500',
            'city': 'San Jose',
            'state': 'CA',
            'country': 'USA',
            'required_skills': 'AWS | Python | Spark | Kafka | Data Engineering | Distributed Systems',
            'posted_job_titles': 'Data Engineer | Platform Engineer | Solutions Architect',
            'experience_levels': 'Mid-Senior level',
            'work_types': 'Full-time | Remote',
            'text': 'Building scalable data analytics infrastructure in the cloud...'
        },
        {
            'id': 19283,
            'name': 'DataForge Labs',
            'similarity_score': 0.6421,
            'description': 'ML operations and MLOps platform provider',
            'industries_list': 'Machine Learning, DevOps',
            'specialties_list': 'MLOps | Model Deployment | ML Infrastructure | Monitoring',
            'employee_count': '50-100',
            'city': 'Palo Alto',
            'state': 'CA',
            'country': 'USA',
            'required_skills': 'Python | Docker | Kubernetes | ML Deployment | Monitoring Tools',
            'posted_job_titles': 'MLOps Engineer | Platform Engineer | DevOps Engineer',
            'experience_levels': 'Mid level | Mid-Senior level',
            'work_types': 'Full-time | Hybrid',
            'text': 'Helping companies deploy and manage ML models at scale...'
        },
        {
            'id': 51234,
            'name': 'InsightAI',
            'similarity_score': 0.6312,
            'description': 'Business intelligence and predictive analytics company',
            'industries_list': 'Business Intelligence, Predictive Analytics',
            'specialties_list': 'Forecasting | Predictive Modeling | BI Tools | Dashboards',
            'employee_count': '100-200',
            'city': 'Chicago',
            'state': 'IL',
            'country': 'USA',
            'required_skills': 'Python | R | Tableau | PowerBI | Statistical Modeling | SQL',
            'posted_job_titles': 'Data Analyst | BI Developer | Analytics Engineer',
            'experience_levels': 'Mid level',
            'work_types': 'Full-time | Hybrid',
            'text': 'Providing predictive analytics and BI solutions for enterprises...'
        },
        {
            'id': 67821,
            'name': 'QuantumLeap Technologies',
            'similarity_score': 0.6198,
            'description': 'Quantum computing and advanced algorithms research',
            'industries_list': 'Quantum Computing, Research',
            'specialties_list': 'Quantum Algorithms | High-Performance Computing | Cryptography',
            'employee_count': '50-100',
            'city': 'Cambridge',
            'state': 'MA',
            'country': 'USA',
            'required_skills': 'Python | Quantum Computing | Linear Algebra | Algorithms | Research',
            'posted_job_titles': 'Quantum Research Scientist | Algorithm Engineer | Research Engineer',
            'experience_levels': 'Senior level | PhD level',
            'work_types': 'Full-time | Onsite',
            'text': 'Pioneering quantum computing applications and algorithms...'
        }
    ]
    
    # Return as list of tuples
    matches = [
        (comp['id'], comp['similarity_score'], comp)
        for comp in companies[:top_k]
    ]
    
    return matches


def get_network_graph_data(candidate_id: int = 0, top_k: int = 10) -> Dict[str, Any]:
    """
    Generate network graph data for visualization.
    
    Args:
        candidate_id: Candidate identifier
        top_k: Number of companies to include
    
    Returns:
        Dictionary with nodes and edges for network graph
    """
    
    candidate = get_candidate_data(candidate_id)
    matches = get_company_matches(candidate_id, top_k)
    
    # Create nodes
    nodes = []
    
    # Add candidate node
    nodes.append({
        'id': f'C{candidate_id}',
        'label': f"Candidate #{candidate_id}",
        'title': candidate['name'],
        'color': '#00FF00',  # Green
        'shape': 'dot',
        'size': 25
    })
    
    # Add company nodes
    for comp_id, score, comp_data in matches:
        nodes.append({
            'id': f'J{comp_id}',
            'label': comp_data['name'][:20],  # Truncate long names
            'title': f"{comp_data['name']}\nScore: {score:.4f}",
            'color': '#FF0000',  # Red
            'shape': 'square',
            'size': 15 + (score * 20)  # Size based on score
        })
    
    # Create edges (connections)
    edges = []
    
    for comp_id, score, comp_data in matches:
        edges.append({
            'from': f'C{candidate_id}',
            'to': f'J{comp_id}',
            'value': score,  # Line thickness
            'title': f'Match Score: {score:.4f}',
            'color': {'opacity': score}  # Transparency based on score
        })
    
    return {
        'nodes': nodes,
        'edges': edges
    }


# For testing
if __name__ == "__main__":
    # Test functions
    candidate = get_candidate_data(0)
    print(f"✅ Candidate: {candidate['name']}")
    
    matches = get_company_matches(0, 5)
    print(f"✅ Top 5 matches loaded")
    
    graph_data = get_network_graph_data(0, 5)
    print(f"✅ Graph data: {len(graph_data['nodes'])} nodes, {len(graph_data['edges'])} edges")