sivan26 commited on
Commit
b601ed7
·
verified ·
1 Parent(s): c1f339b

Create jobs_dataset.py

Browse files
Files changed (1) hide show
  1. jobs_dataset.py +55 -0
jobs_dataset.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from typing import List, Dict
3
+
4
+ def generate_job_database() -> List[Dict]:
5
+ """Generate a comprehensive database of jobs across various industries."""
6
+ job_templates = {
7
+ "Technology": [
8
+ {"title": "Software Engineer", "desc": "Design, develop, and maintain software applications.", "skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]},
9
+ {"title": "Data Scientist", "desc": "Analyze complex data to extract valuable business insights.", "skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]},
10
+ {"title": "DevOps Engineer", "desc": "Manage infrastructure, deployment pipelines, and automation.", "skills": ["AWS", "Docker", "Kubernetes", "Linux", "CI/CD", "Terraform"]},
11
+ {"title": "Frontend Developer", "desc": "Create intuitive user interfaces and engaging web experiences.", "skills": ["JavaScript", "React", "CSS", "HTML", "TypeScript", "UI/UX Principles"]},
12
+ {"title": "Backend Developer", "desc": "Build robust server-side applications, services, and APIs.", "skills": ["Python", "Node.js", "Django", "PostgreSQL", "REST APIs", "MongoDB"]},
13
+ {"title": "Machine Learning Engineer", "desc": "Deploy, monitor, and maintain ML models in production environments.", "skills": ["Python", "TensorFlow", "PyTorch", "MLOps", "Docker", "Scikit-learn"]},
14
+ ],
15
+ "Healthcare": [
16
+ {"title": "Registered Nurse", "desc": "Provide compassionate patient care and medical support.", "skills": ["Patient Care", "Medical Knowledge", "CPR", "Communication", "Teamwork"]},
17
+ {"title": "Healthcare Data Analyst", "desc": "Analyze clinical data to improve patient outcomes and operational efficiency.", "skills": ["SQL", "Python", "Tableau", "Healthcare Regulations", "Statistics"]},
18
+ {"title": "Medical Assistant", "desc": "Support healthcare providers with clinical and administrative tasks.", "skills": ["Patient Communication", "Medical Records", "Scheduling", "Clinical Skills"]},
19
+ ],
20
+ "Finance": [
21
+ {"title": "Financial Analyst", "desc": "Analyze financial data, create financial models, and support investment decisions.", "skills": ["Financial Modeling", "Excel", "Data Analysis", "Valuation", "Market Research"]},
22
+ {"title": "Accountant", "desc": "Manage financial records, prepare tax documents, and ensure compliance.", "skills": ["Accounting", "QuickBooks", "Tax Law", "Financial Reporting", "Auditing"]},
23
+ {"title": "Fintech Software Engineer", "desc": "Develop software for financial services, focusing on security and scalability.", "skills": ["Python", "Java", "SQL", "Cybersecurity", "Blockchain"]},
24
+ ],
25
+ "Marketing": [
26
+ {"title": "Digital Marketing Manager", "desc": "Develop and execute comprehensive digital marketing strategies.", "skills": ["Digital Marketing", "SEO", "Social Media", "Google Analytics", "Content Strategy"]},
27
+ {"title": "Content Creator", "desc": "Produce engaging and brand-aligned content for various platforms.", "skills": ["Content Creation", "SEO", "Social Media", "Writing", "Video Editing"]},
28
+ {"title": "Marketing Data Analyst", "desc": "Analyze marketing campaign performance and customer behavior data.", "skills": ["SQL", "Google Analytics", "Data Visualization", "A/B Testing", "Excel"]},
29
+ ]
30
+ }
31
+ experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"]
32
+ salary_ranges = {
33
+ "Entry-level": ["$45k-$65k", "$50k-$70k"], "Mid-level": ["$70k-$95k", "$75k-$100k"],
34
+ "Senior": ["$100k-$130k", "$115k-$145k"], "Lead/Principal": ["$140k-$170k", "$150k-$180k"]
35
+ }
36
+ jobs = []
37
+ job_id = 1
38
+ for _ in range(150): # Generate a larger database
39
+ for category, templates in job_templates.items():
40
+ template = random.choice(templates)
41
+ exp_level = random.choice(experience_levels)
42
+ title = f"{exp_level} {template['title']}" if exp_level != "Entry-level" else template['title']
43
+ job = {
44
+ "id": job_id, "title": title, "description": template["desc"], "requirements": list(set(template["skills"])),
45
+ "experience_level": exp_level, "salary_range": random.choice(salary_ranges[exp_level]), "category": category,
46
+ "location": random.choice(["Remote", "New York, NY", "San Francisco, CA", "Chicago, IL", "Austin, TX"]),
47
+ }
48
+ jobs.append(job)
49
+ job_id += 1
50
+ return jobs
51
+
52
+ # --- Important Line ---
53
+ # We run the function once and store the result in a variable.
54
+ # This variable is what we will import into our main app.
55
+ JOBS_DATABASE = generate_job_database()