File size: 5,448 Bytes
3d015cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
{
    "domain_id": "data_science",
    "display_name": "Data Science & Analytics",
    "description": "Machine Learning, Data Analysis, AI Research, and Business Intelligence",
    "core_skills": [
        "python",
        "r",
        "sql",
        "pandas",
        "numpy",
        "scikit_learn",
        "tensorflow",
        "pytorch",
        "keras",
        "xgboost",
        "tableau",
        "power_bi",
        "matplotlib",
        "seaborn",
        "statistics",
        "ab_testing",
        "feature_engineering",
        "spark",
        "hadoop",
        "airflow",
        "dbt"
    ],
    "aspect_prototypes": {
        "technical_skills": [
            "built machine learning models using scikit-learn and XGBoost",
            "developed deep learning pipelines with PyTorch",
            "created ETL jobs using PySpark for big data processing",
            "trained neural networks for image classification",
            "implemented NLP models using transformers and BERT",
            "designed feature engineering pipelines for ML",
            "built recommendation systems using collaborative filtering",
            "deployed ML models to production with MLflow",
            "created interactive dashboards in Tableau",
            "performed A/B testing with statistical significance analysis"
        ],
        "problem_solving": [
            "improved model accuracy from 78% to 92% through feature engineering",
            "reduced model training time by 60% using distributed computing",
            "diagnosed and fixed data leakage in ML pipeline",
            "optimized hyperparameters using Bayesian optimization",
            "handled class imbalance with SMOTE and weighted sampling",
            "debugged data quality issues affecting model performance",
            "designed experiment to measure causal impact of recommendation",
            "created automated anomaly detection system",
            "resolved data drift issues in production models",
            "built interpretable models for regulatory compliance"
        ],
        "leadership": [
            "led data science team of 4 on personalization project",
            "presented ML insights to C-level stakeholders",
            "coordinated with engineering for model deployment",
            "organized data science reading group in company",
            "mentored junior analysts on SQL and Python",
            "drove adoption of MLOps best practices",
            "led cross-functional project with marketing team",
            "managed data labeling team for annotation project",
            "conducted training sessions on Pandas for analysts",
            "championed experiment-driven decision making culture"
        ],
        "internship_experience": [
            "data science intern at Flipkart building recommendation models",
            "ML research intern at Google Brain working on NLP",
            "analytics intern at McKinsey for retail optimization",
            "AI intern at NVIDIA on computer vision projects",
            "research intern at IISc on deep learning",
            "data analyst intern at Zomato for demand forecasting",
            "business intelligence intern at Amazon building dashboards",
            "ML platform intern at Meta for model serving",
            "quantitative research intern at Goldman Sachs",
            "applied scientist intern at AWS on personalization"
        ]
    },
    "industry_benchmarks": {
        "min_employability_score": 0.65,
        "expected_cgpa": 8.0,
        "expected_internship_months": 4,
        "critical_skills": [
            "python",
            "sql",
            "statistics",
            "ml_fundamentals"
        ],
        "nice_to_have_skills": [
            "deep_learning",
            "spark",
            "mlops",
            "cloud"
        ]
    },
    "skill_gaps_mapping": {
        "deep_learning": {
            "demand_score": 0.80,
            "courses": [
                "Deep Learning Specialization",
                "Fast.ai",
                "Stanford CS231n"
            ],
            "certifications": [
                "TensorFlow Developer",
                "PyTorch Certified"
            ]
        },
        "mlops": {
            "demand_score": 0.75,
            "courses": [
                "MLOps Specialization",
                "ML Engineering for Production"
            ],
            "certifications": [
                "AWS ML Specialty",
                "GCP ML Engineer"
            ]
        },
        "statistics": {
            "demand_score": 0.70,
            "courses": [
                "Statistics for Data Science",
                "A/B Testing Masterclass"
            ],
            "certifications": []
        },
        "big_data": {
            "demand_score": 0.65,
            "courses": [
                "Spark for Data Engineering",
                "Databricks Academy"
            ],
            "certifications": [
                "Databricks Certified",
                "Cloudera CCA"
            ]
        }
    },
    "detection_keywords": [
        "data science",
        "machine learning",
        "deep learning",
        "ai",
        "analytics",
        "data analyst",
        "ml engineer",
        "research scientist",
        "business intelligence",
        "statistical modeling",
        "predictive analytics",
        "data mining"
    ]
}