Spaces:
Sleeping
Sleeping
| # Role curation — Round 4 (2026-04-19) | |
| # | |
| # Driven by REAL Pakistani job-board evidence (Glassdoor PK / Indeed PK / LinkedIn PK postings). | |
| # Previous rounds R1-R3 included roadmap/curriculum-level fundamentals (DSA, OOP, Agile, etc.) | |
| # that — while real interview prep concerns — are NOT what real job postings list as requirements. | |
| # R4 removes them to match what employers actually ask for. | |
| # | |
| # Target: 10-18 skills per role. Mandatory tier capped at ~10. | |
| # Total: ~146 entries across 10 roles. | |
| # | |
| # RULE: A skill in this list IS kept. Anything in onet_roles_raw.yaml NOT listed here is dropped | |
| # (with reason logged to onet_roles_curation_log.md). Each entry can override is_mandatory / | |
| # required_level / weight. Omit to keep raw values. | |
| # | |
| # Sources cited in research/06-dataset-sourcing.md §10 (R4 sources subsection). | |
| curation: | |
| "Data Scientist": | |
| keep: | |
| - { name: "Python" } | |
| - { name: "SQL" } | |
| - { name: "Pandas" } | |
| - { name: "NumPy" } | |
| - { name: "Scikit-learn" } | |
| - { name: "PyTorch" } | |
| - { name: "Statistical Hypothesis Testing" } | |
| - { name: "Git" } | |
| - { name: "TensorFlow", is_mandatory: false, weight: 0.9 } | |
| - { name: "AWS", is_mandatory: false, weight: 0.8 } | |
| - { name: "Tableau", is_mandatory: false, weight: 0.8 } | |
| - { name: "Apache Spark", is_mandatory: false, weight: 0.7 } | |
| - { name: "Feature Engineering" } | |
| - { name: "Microsoft Power BI", is_mandatory: false, weight: 0.6 } | |
| - { name: "Matplotlib" } | |
| - { name: "Docker", is_mandatory: false, weight: 0.6 } # MLOps overlap | |
| drop_reasons: | |
| "C++": "Low-level numerics only; not in PK DS postings" | |
| "SAS": "Enterprise legacy" | |
| "MATLAB": "Academic only" | |
| "Java": "Rare in DS workflow" | |
| "Microsoft Azure": "Pick one cloud — AWS retained as primary" | |
| "Apache Hadoop": "Spark replaced it" | |
| "Microsoft PowerPoint": "Presentation tool, not measurable" | |
| "R": "R4: PK DS market is Python+SQL; R rarely required (Glassdoor PK Data Scientist evidence)" | |
| "Microsoft Excel": "R4: not core for DS (Excel is for DA/BI roles)" | |
| "Snowflake": "R4: warehouse skill — Data Engineer territory" | |
| "Jupyter Notebook": "R4: assumed knowledge, not learnable on slider" | |
| "Seaborn": "R4: covered by Matplotlib retention" | |
| "Data Structures & Algorithms": "R4: interview prep, not posting requirement (per job-board evidence)" | |
| "Data Analyst": | |
| keep: | |
| - { name: "SQL" } | |
| - { name: "Microsoft Excel", required_level: "ADVANCED" } # R3: collapsed Excel + Excel Advanced | |
| - { name: "Microsoft Power BI" } | |
| - { name: "Python", is_mandatory: true, weight: 0.9 } | |
| - { name: "Tableau", is_mandatory: false, weight: 0.8 } | |
| - { name: "Pandas", is_mandatory: false, weight: 0.7 } | |
| - { name: "DAX", is_mandatory: false, weight: 0.7 } # for Power BI users | |
| - { name: "Power Query / M Language", is_mandatory: false, weight: 0.6 } | |
| - { name: "Data Visualization" } | |
| - { name: "Statistical Hypothesis Testing", is_mandatory: false, weight: 0.5 } | |
| drop_reasons: | |
| "IBM SPSS Statistics": "Enterprise legacy" | |
| "Microsoft Access": "Legacy desktop DB" | |
| "Microsoft Office": "Too generic" | |
| "Microsoft PowerPoint": "Presentation tool" | |
| "SAS": "Enterprise legacy; PK market is Python/SQL" | |
| "AWS": "Junior DA in PK rarely owns cloud infra" | |
| "Microsoft Excel Advanced": "R3: collapsed into Microsoft Excel with required_level=ADVANCED" | |
| "R": "R4: PK DA postings rarely list R; demoted then dropped" | |
| "Machine Learning Engineer": | |
| keep: | |
| - { name: "Python" } | |
| - { name: "PyTorch" } | |
| - { name: "Scikit-learn" } | |
| - { name: "SQL" } | |
| - { name: "Docker" } | |
| - { name: "AWS" } | |
| - { name: "Pandas" } | |
| - { name: "NumPy" } | |
| - { name: "Git" } | |
| - { name: "GitHub" } | |
| - { name: "TensorFlow", is_mandatory: false, weight: 0.9 } | |
| - { name: "MLflow" } | |
| - { name: "Kubernetes", is_mandatory: false, weight: 0.7 } | |
| - { name: "FastAPI" } | |
| - { name: "Apache Spark", is_mandatory: false, weight: 0.7 } | |
| - { name: "Linux", is_mandatory: false, weight: 0.6 } | |
| drop_reasons: | |
| "C++": "Niche optimization only" | |
| "R": "Statistics-heavy — DS territory" | |
| "SAS": "Enterprise legacy" | |
| "MATLAB": "Academic only" | |
| "Microsoft Excel": "Not core to ML eng" | |
| "Microsoft PowerPoint": "Presentation tool" | |
| "Microsoft Power BI": "BI tool" | |
| "Tableau": "BI tool" | |
| "Snowflake": "Data Engineer territory" | |
| "Apache Hadoop": "Spark replaced it" | |
| "Java": "Rare in ML pipelines" | |
| "Microsoft Azure": "Pick one cloud — AWS retained" | |
| "Ansible": "DevOps territory" | |
| "C": "Legacy systems language" | |
| "Go": "Rare in ML pipelines" | |
| "Terraform": "DevOps territory" | |
| "JavaScript": "Frontend, not ML" | |
| "Jenkins": "Replaced by GitHub Actions; not ML-eng-specific" | |
| "NoSQL": "Generic" | |
| "Scala": "Spark+Scala niche, declining" | |
| "Splunk Enterprise": "Observability niche" | |
| "ONNX": "R4: niche optimization, not in PK ML postings" | |
| "Bash": "R4: assumed background skill" | |
| "Object-Oriented Programming": "R4: not in postings (interview prep)" | |
| "Data Structures & Algorithms": "R4: not in postings (interview prep)" | |
| "Agile / Scrum": "R4: rarely in ML eng postings" | |
| "Backend Developer": | |
| keep: | |
| - { name: "Python" } | |
| - { name: "Django" } | |
| - { name: "Node.js" } | |
| - { name: "PostgreSQL" } | |
| - { name: "SQL" } | |
| - { name: "REST API Design" } | |
| - { name: "Docker" } | |
| - { name: "Git" } | |
| - { name: "GitHub" } | |
| - { name: "JWT / OAuth", is_mandatory: true, weight: 0.8 } | |
| - { name: "FastAPI", is_mandatory: false, weight: 0.8 } | |
| - { name: "TypeScript", is_mandatory: false, weight: 0.7 } | |
| - { name: "AWS", is_mandatory: false, weight: 0.7 } | |
| - { name: "NoSQL", is_mandatory: false, weight: 0.6 } # MongoDB-equivalent | |
| - { name: "Laravel" } | |
| - { name: "PHP" } | |
| drop_reasons: | |
| "C": "Not Python/Node-stack backend" | |
| "C#": "Microsoft .NET stack — separate market" | |
| "C++": "Game/systems, not web backend" | |
| "CSS": "Frontend" | |
| "HTML": "Frontend" | |
| "XML": "Legacy" | |
| "JSON": "Assumed knowledge" | |
| ".NET": "Microsoft enterprise stack — separate market" | |
| "Java": "Spring stack — separate market" | |
| "Spring Boot": "Java stack" | |
| "Spring Framework": "Java stack" | |
| "Angular": "Frontend" | |
| "React": "Frontend" | |
| "Apache Kafka": "Data Engineer territory" | |
| "Terraform": "DevOps territory" | |
| "Microsoft Azure": "Pick one cloud — AWS retained" | |
| "Jenkins": "DevOps territory" | |
| "Apache Spark": "Data Eng territory" | |
| "Jira": "PM tool" | |
| "Flask": "R4: declining; FastAPI/Django dominate PK postings" | |
| "Redis": "R4: rarely listed by name in PK postings (assumed within Postgres/MongoDB choice)" | |
| "Linux": "R4: assumed background" | |
| "Kubernetes": "R4: senior-only in PK; Docker covers entry-level" | |
| "pytest": "R4: rarely listed by name in postings" | |
| "Celery": "R4: rarely listed by name" | |
| "nginx": "R4: rarely listed by name" | |
| "Pydantic": "R4: rarely listed by name (implicit with FastAPI)" | |
| "Object-Oriented Programming": "R4: assumed for interviews, not posting requirement" | |
| "Data Structures & Algorithms": "R4: interview prep, not posting requirement" | |
| "Agile / Scrum": "R4: not in posting requirement lists" | |
| "CI/CD Pipelines": "R4: not as concept skill in postings" | |
| "GitHub Actions": "R4: rare standalone in PK BE postings" | |
| ".NET / C#": "R4: separate Pakistani market, deserves its own role tier" | |
| "Unit Testing": "R4: not in posting requirement lists" | |
| "Frontend Developer": | |
| keep: | |
| - { name: "React" } | |
| - { name: "JavaScript" } | |
| - { name: "TypeScript" } | |
| - { name: "HTML" } | |
| - { name: "CSS" } | |
| - { name: "REST API Design" } | |
| - { name: "Git" } | |
| - { name: "GitHub" } | |
| - { name: "Next.js" } | |
| - { name: "Tailwind CSS", is_mandatory: false, weight: 0.8 } | |
| - { name: "Redux" } # R4: explicitly cited in PK MERN postings | |
| - { name: "Node.js", is_mandatory: false, weight: 0.7 } | |
| - { name: "Vite", is_mandatory: false, weight: 0.5 } | |
| - { name: "TanStack Query", is_mandatory: false, weight: 0.4 } | |
| drop_reasons: | |
| "C#": "Backend" | |
| "C++": "Not FE" | |
| "Java": "Backend" | |
| "PHP": "Backend (separate Full Stack stack)" | |
| "Python": "Backend" | |
| "Go": "Not FE" | |
| "SQL": "Backend/data" | |
| "NoSQL": "Backend/data" | |
| "MongoDB": "Backend/data" | |
| "MySQL": "Backend/data" | |
| "PostgreSQL": "Backend/data" | |
| "Apache Kafka": "Data Eng" | |
| "Kubernetes": "DevOps territory" | |
| "Microsoft Azure": "DevOps territory" | |
| "AWS": "DevOps territory" | |
| "Spring Boot": "Java backend" | |
| "Spring Framework": "Java backend" | |
| "Jenkins": "DevOps" | |
| "jQuery": "Legacy" | |
| "WordPress": "CMS — separate small-agency market" | |
| "JSON": "Assumed" | |
| "Jira": "PM tool" | |
| "Vue.js": "R4: separate role market in PK; React dominates" | |
| "Angular": "R4: separate role market in PK" | |
| "Vitest": "R4: testing rarely in PK FE postings" | |
| "Playwright": "R4: testing rarely in PK FE postings" | |
| "Docker": "R4: rarely listed for FE-only roles" | |
| "Web Accessibility (a11y)": "R4: rarely a posted requirement in PK" | |
| "Responsive Design": "R4: assumed within React+Tailwind work" | |
| "GraphQL": "R4: rare in PK FE postings" | |
| "Data Structures & Algorithms": "R4: interview prep, not posting requirement" | |
| "Agile / Scrum": "R4: not in FE posting requirement lists" | |
| "Full Stack Developer": | |
| keep: | |
| - { name: "JavaScript" } | |
| - { name: "TypeScript" } | |
| - { name: "React" } | |
| - { name: "Node.js" } | |
| - { name: "Python" } | |
| - { name: "PostgreSQL" } | |
| - { name: "REST API Design" } | |
| - { name: "SQL" } | |
| - { name: "Docker" } | |
| - { name: "Git" } | |
| - { name: "GitHub" } | |
| - { name: "HTML", is_mandatory: true, weight: 0.9 } | |
| - { name: "CSS", is_mandatory: true, weight: 0.9 } | |
| - { name: "Next.js" } | |
| - { name: "Django", is_mandatory: false, weight: 0.8 } | |
| - { name: "MongoDB", is_mandatory: false, weight: 0.8 } | |
| - { name: "Tailwind CSS", is_mandatory: false, weight: 0.7 } | |
| - { name: "AWS", is_mandatory: false, weight: 0.7 } | |
| drop_reasons: | |
| "C": "Not web full-stack" | |
| "C#": ".NET niche — separate stack" | |
| "C++": "Not web full-stack" | |
| "XML": "Legacy" | |
| "JSON": "Assumed knowledge" | |
| ".NET": ".NET niche — separate stack" | |
| "Java": "Spring niche — separate stack" | |
| "Spring Boot": "Java stack" | |
| "Spring Framework": "Java stack" | |
| "Apache Kafka": "Data Eng" | |
| "Terraform": "DevOps" | |
| "Microsoft Azure": "Pick one cloud — AWS retained" | |
| "MySQL": "PostgreSQL covers SQL needs" | |
| "Angular": "Pick one FE framework — React retained" | |
| "Vue.js": "Pick one FE framework — React retained" | |
| "Prisma": "R3: Python client archived 2025-04, community-only" | |
| "Go": "Niche for full-stack web in PK" | |
| "PHP": "R4: separate Laravel/PHP stack market — Backend covers" | |
| "Laravel": "R4: separate Laravel/PHP stack market — Backend covers" | |
| "Jenkins": "DevOps" | |
| "jQuery": "Legacy" | |
| "WordPress": "CMS — separate market" | |
| "Jira": "PM tool" | |
| "GraphQL": "R4: rare in PK FS postings" | |
| "Linux": "R4: assumed background" | |
| "Vite": "R4: assumed within Next.js/React tooling" | |
| "Vitest": "R4: testing rarely in PK postings" | |
| "JWT / OAuth": "R4: covered by Backend role; FS focuses on stack" | |
| "NoSQL": "Generic — MongoDB covers it" | |
| "Docker Compose": "Reviewer R2: redundant with Docker" | |
| "Object-Oriented Programming": "R4: not in posting requirement lists" | |
| "Data Structures & Algorithms": "R4: interview prep, not posting requirement" | |
| "Agile / Scrum": "R4: not in posting requirement lists" | |
| "CI/CD Pipelines": "R4: not as concept skill in PK FS postings" | |
| "GitHub Actions": "R4: rare standalone for FS" | |
| "Data Engineer": | |
| keep: | |
| - { name: "Python" } | |
| - { name: "SQL" } | |
| - { name: "Apache Spark" } | |
| - { name: "Apache Airflow" } | |
| - { name: "ETL / ELT" } | |
| - { name: "AWS" } | |
| - { name: "Git" } | |
| - { name: "Snowflake", is_mandatory: false, weight: 0.8 } | |
| - { name: "dbt", is_mandatory: false, weight: 0.8 } | |
| - { name: "Apache Kafka" } | |
| - { name: "Google BigQuery" } | |
| - { name: "Docker", is_mandatory: false, weight: 0.7 } | |
| - { name: "Microsoft Azure", is_mandatory: false, weight: 0.6 } | |
| - { name: "Data Modeling", is_mandatory: false, weight: 0.6 } | |
| drop_reasons: | |
| "Microsoft Access": "Legacy desktop DB" | |
| "Microsoft Office": "Too generic" | |
| "Microsoft Outlook": "Email client" | |
| "Microsoft PowerPoint": "Presentation tool" | |
| "Microsoft Power BI": "BI Analyst territory" | |
| "Tableau": "BI Analyst territory" | |
| "R": "BI/stats territory; Python+SQL covers DE" | |
| "Data Modeling (Kimball)": "R3: renamed to plain 'Data Modeling'" | |
| "Apache Iceberg": "R4: rare in PK entry-level DE postings; Delta Lake (also dropped) was the safer pick but neither needed" | |
| "Delta Lake": "R4: rare in PK entry-level DE postings" | |
| "Databricks": "R4: senior-only in PK postings" | |
| "Parquet": "R4: rarely listed by name in postings (implicit with Spark/Airflow)" | |
| "Java": "R4: declining for DE; Python+Scala dominate" | |
| "Microsoft Excel": "R4: rare for DE postings" | |
| "Data Structures & Algorithms": "R4: interview prep, not posting requirement" | |
| "DevOps Engineer": | |
| keep: | |
| - { name: "Docker" } | |
| - { name: "Kubernetes" } | |
| - { name: "Terraform" } | |
| - { name: "AWS" } | |
| - { name: "Linux" } | |
| - { name: "Bash Scripting" } | |
| - { name: "CI/CD Pipelines" } | |
| - { name: "Git" } | |
| - { name: "GitHub" } | |
| - { name: "Python", is_mandatory: true, weight: 0.8 } | |
| - { name: "GitHub Actions", is_mandatory: false, weight: 0.8 } | |
| - { name: "Microsoft Azure", is_mandatory: false, weight: 0.7 } | |
| - { name: "Ansible", is_mandatory: false, weight: 0.7 } | |
| - { name: "Prometheus" } | |
| - { name: "Grafana" } | |
| - { name: "Jenkins", is_mandatory: false, weight: 0.5 } | |
| drop_reasons: | |
| "Bash": "Duplicate of Bash Scripting" | |
| "Amazon Web Services AWS CloudFormation": "Terraform won" | |
| "C": "Not DevOps day-to-day" | |
| "C++": "Not DevOps day-to-day" | |
| "Java": "Rare in modern DevOps" | |
| "JavaScript": "Not DevOps" | |
| "Go": "K8s/Terraform internals only" | |
| "Microsoft Active Directory": "Windows enterprise niche" | |
| "Microsoft PowerShell": "Windows admin only" | |
| "Linux Administration": "R2: duplicate of Linux" | |
| "Jira": "R2: PM tool consistency" | |
| "ArgoCD": "R4: senior-only in PK postings" | |
| "Helm": "R4: senior-only in PK postings" | |
| "Computer Networking Fundamentals": "R4: assumed background, not in posting requirement lists" | |
| "Google Cloud Platform": "R4: rare in PK DevOps postings (AWS/Azure dominate)" | |
| "SQL": "R4: debugging-only, not core skill" | |
| "AI Engineer (GenAI / LLM)": | |
| keep: | |
| - { name: "Python" } | |
| - { name: "HuggingFace Transformers" } | |
| - { name: "LangChain" } | |
| - { name: "OpenAI API" } | |
| - { name: "RAG" } # R4: renamed from "Retrieval-Augmented Generation (RAG)" to canonical job-posting term | |
| - { name: "Prompt Engineering" } | |
| - { name: "PyTorch" } | |
| - { name: "Docker" } | |
| - { name: "Git" } | |
| - { name: "GitHub" } | |
| - { name: "AWS", is_mandatory: false, weight: 0.8 } | |
| - { name: "Microsoft Azure", is_mandatory: false, weight: 0.7 } | |
| - { name: "Anthropic Claude API" } | |
| - { name: "pgvector" } | |
| - { name: "FastAPI" } | |
| - { name: "LangGraph", is_mandatory: false, weight: 0.6 } | |
| - { name: "Sentence Transformers", is_mandatory: false, weight: 0.6 } | |
| drop_reasons: | |
| "C": "Legacy" | |
| "C++": "Niche" | |
| "Bash": "Background" | |
| "Go": "Rare in AI eng" | |
| "Scala": "Big data, not LLM eng" | |
| "Java": "Rare in AI eng" | |
| "R": "Statistics, not LLM eng" | |
| "JavaScript": "Not AI eng" | |
| "Apache Hadoop": "Big data — Data Eng" | |
| "Apache Spark": "Big data — Data Eng" | |
| "Ansible": "DevOps" | |
| "Terraform": "DevOps" | |
| "Jenkins": "DevOps" | |
| "Linux": "Background" | |
| "NoSQL": "Generic — vector DBs are the relevant DB type" | |
| "Splunk Enterprise": "Observability niche" | |
| "TensorFlow": "PyTorch dominates research/LLM" | |
| "Retrieval-Augmented Generation (RAG)": "R4: renamed to canonical 'RAG'" | |
| "Fine-tuning (LoRA/PEFT)": "R3: renamed to 'LLM Fine-tuning'; R4: dropped entirely (rare in postings)" | |
| "LlamaIndex": "R4: overlaps LangChain; rarely both required in PK postings" | |
| "Pinecone": "R4: open-source vector DBs (pgvector, Chroma) winning" | |
| "LLM Evaluations": "R4: concept rarely listed in postings" | |
| "LLM Observability": "R4: concept rarely listed in postings" | |
| "LLM Fine-tuning": "R4: rare in entry-level PK postings" | |
| "Kubernetes": "R4: senior-only in PK AI eng postings" | |
| "SQL": "R4: rare for AI Eng day-to-day" | |
| "Object-Oriented Programming": "R4: interview prep" | |
| "Data Structures & Algorithms": "R4: interview prep" | |
| "Business Intelligence Analyst": | |
| keep: | |
| - { name: "Microsoft Power BI" } | |
| - { name: "DAX" } | |
| - { name: "SQL" } | |
| - { name: "Microsoft Excel", required_level: "ADVANCED" } | |
| - { name: "Power Query / M Language", is_mandatory: true, weight: 0.9 } | |
| - { name: "Tableau", is_mandatory: false, weight: 0.7 } | |
| - { name: "Snowflake", is_mandatory: false, weight: 0.7 } | |
| - { name: "Python", is_mandatory: false, weight: 0.6 } | |
| - { name: "Star Schema Modeling" } | |
| - { name: "Data Storytelling" } | |
| drop_reasons: | |
| "Microsoft Office": "Too generic — Microsoft Excel covers it" | |
| "Microsoft PowerPoint": "Presentation, not BI engineering" | |
| "Oracle Cloud": "Enterprise niche" | |
| "SAP": "ERP — niche" | |
| "SAS": "Enterprise legacy" | |
| "AWS": "BI Analyst consumes from cloud DWH, doesn't own infra" | |
| "Microsoft Azure": "Same as AWS — BI Analyst doesn't own cloud infra" | |
| "Microsoft Excel Advanced": "R3: collapsed into Microsoft Excel with required_level=ADVANCED" | |
| "Looker": "R4: rare in PK BI postings (Power BI/Tableau dominate)" | |
| "dbt": "R4: more for Data Eng; rarely in PK BI postings" | |
| "R": "R4: PK BI market is Power BI+SQL+Excel; R rarely required" | |