FCT / domains /data_science.json
Parthnuwal7
Adding analytical content
3d015cd
{
"domain_id": "data_science",
"display_name": "Data Science & Analytics",
"description": "Machine Learning, Data Analysis, AI Research, and Business Intelligence",
"core_skills": [
"python",
"r",
"sql",
"pandas",
"numpy",
"scikit_learn",
"tensorflow",
"pytorch",
"keras",
"xgboost",
"tableau",
"power_bi",
"matplotlib",
"seaborn",
"statistics",
"ab_testing",
"feature_engineering",
"spark",
"hadoop",
"airflow",
"dbt"
],
"aspect_prototypes": {
"technical_skills": [
"built machine learning models using scikit-learn and XGBoost",
"developed deep learning pipelines with PyTorch",
"created ETL jobs using PySpark for big data processing",
"trained neural networks for image classification",
"implemented NLP models using transformers and BERT",
"designed feature engineering pipelines for ML",
"built recommendation systems using collaborative filtering",
"deployed ML models to production with MLflow",
"created interactive dashboards in Tableau",
"performed A/B testing with statistical significance analysis"
],
"problem_solving": [
"improved model accuracy from 78% to 92% through feature engineering",
"reduced model training time by 60% using distributed computing",
"diagnosed and fixed data leakage in ML pipeline",
"optimized hyperparameters using Bayesian optimization",
"handled class imbalance with SMOTE and weighted sampling",
"debugged data quality issues affecting model performance",
"designed experiment to measure causal impact of recommendation",
"created automated anomaly detection system",
"resolved data drift issues in production models",
"built interpretable models for regulatory compliance"
],
"leadership": [
"led data science team of 4 on personalization project",
"presented ML insights to C-level stakeholders",
"coordinated with engineering for model deployment",
"organized data science reading group in company",
"mentored junior analysts on SQL and Python",
"drove adoption of MLOps best practices",
"led cross-functional project with marketing team",
"managed data labeling team for annotation project",
"conducted training sessions on Pandas for analysts",
"championed experiment-driven decision making culture"
],
"internship_experience": [
"data science intern at Flipkart building recommendation models",
"ML research intern at Google Brain working on NLP",
"analytics intern at McKinsey for retail optimization",
"AI intern at NVIDIA on computer vision projects",
"research intern at IISc on deep learning",
"data analyst intern at Zomato for demand forecasting",
"business intelligence intern at Amazon building dashboards",
"ML platform intern at Meta for model serving",
"quantitative research intern at Goldman Sachs",
"applied scientist intern at AWS on personalization"
]
},
"industry_benchmarks": {
"min_employability_score": 0.65,
"expected_cgpa": 8.0,
"expected_internship_months": 4,
"critical_skills": [
"python",
"sql",
"statistics",
"ml_fundamentals"
],
"nice_to_have_skills": [
"deep_learning",
"spark",
"mlops",
"cloud"
]
},
"skill_gaps_mapping": {
"deep_learning": {
"demand_score": 0.80,
"courses": [
"Deep Learning Specialization",
"Fast.ai",
"Stanford CS231n"
],
"certifications": [
"TensorFlow Developer",
"PyTorch Certified"
]
},
"mlops": {
"demand_score": 0.75,
"courses": [
"MLOps Specialization",
"ML Engineering for Production"
],
"certifications": [
"AWS ML Specialty",
"GCP ML Engineer"
]
},
"statistics": {
"demand_score": 0.70,
"courses": [
"Statistics for Data Science",
"A/B Testing Masterclass"
],
"certifications": []
},
"big_data": {
"demand_score": 0.65,
"courses": [
"Spark for Data Engineering",
"Databricks Academy"
],
"certifications": [
"Databricks Certified",
"Cloudera CCA"
]
}
},
"detection_keywords": [
"data science",
"machine learning",
"deep learning",
"ai",
"analytics",
"data analyst",
"ml engineer",
"research scientist",
"business intelligence",
"statistical modeling",
"predictive analytics",
"data mining"
]
}