Spaces:
Sleeping
Sleeping
| { | |
| "domain_id": "data_science", | |
| "display_name": "Data Science & Analytics", | |
| "description": "Machine Learning, Data Analysis, AI Research, and Business Intelligence", | |
| "core_skills": [ | |
| "python", | |
| "r", | |
| "sql", | |
| "pandas", | |
| "numpy", | |
| "scikit_learn", | |
| "tensorflow", | |
| "pytorch", | |
| "keras", | |
| "xgboost", | |
| "tableau", | |
| "power_bi", | |
| "matplotlib", | |
| "seaborn", | |
| "statistics", | |
| "ab_testing", | |
| "feature_engineering", | |
| "spark", | |
| "hadoop", | |
| "airflow", | |
| "dbt" | |
| ], | |
| "aspect_prototypes": { | |
| "technical_skills": [ | |
| "built machine learning models using scikit-learn and XGBoost", | |
| "developed deep learning pipelines with PyTorch", | |
| "created ETL jobs using PySpark for big data processing", | |
| "trained neural networks for image classification", | |
| "implemented NLP models using transformers and BERT", | |
| "designed feature engineering pipelines for ML", | |
| "built recommendation systems using collaborative filtering", | |
| "deployed ML models to production with MLflow", | |
| "created interactive dashboards in Tableau", | |
| "performed A/B testing with statistical significance analysis" | |
| ], | |
| "problem_solving": [ | |
| "improved model accuracy from 78% to 92% through feature engineering", | |
| "reduced model training time by 60% using distributed computing", | |
| "diagnosed and fixed data leakage in ML pipeline", | |
| "optimized hyperparameters using Bayesian optimization", | |
| "handled class imbalance with SMOTE and weighted sampling", | |
| "debugged data quality issues affecting model performance", | |
| "designed experiment to measure causal impact of recommendation", | |
| "created automated anomaly detection system", | |
| "resolved data drift issues in production models", | |
| "built interpretable models for regulatory compliance" | |
| ], | |
| "leadership": [ | |
| "led data science team of 4 on personalization project", | |
| "presented ML insights to C-level stakeholders", | |
| "coordinated with engineering for model deployment", | |
| "organized data science reading group in company", | |
| "mentored junior analysts on SQL and Python", | |
| "drove adoption of MLOps best practices", | |
| "led cross-functional project with marketing team", | |
| "managed data labeling team for annotation project", | |
| "conducted training sessions on Pandas for analysts", | |
| "championed experiment-driven decision making culture" | |
| ], | |
| "internship_experience": [ | |
| "data science intern at Flipkart building recommendation models", | |
| "ML research intern at Google Brain working on NLP", | |
| "analytics intern at McKinsey for retail optimization", | |
| "AI intern at NVIDIA on computer vision projects", | |
| "research intern at IISc on deep learning", | |
| "data analyst intern at Zomato for demand forecasting", | |
| "business intelligence intern at Amazon building dashboards", | |
| "ML platform intern at Meta for model serving", | |
| "quantitative research intern at Goldman Sachs", | |
| "applied scientist intern at AWS on personalization" | |
| ] | |
| }, | |
| "industry_benchmarks": { | |
| "min_employability_score": 0.65, | |
| "expected_cgpa": 8.0, | |
| "expected_internship_months": 4, | |
| "critical_skills": [ | |
| "python", | |
| "sql", | |
| "statistics", | |
| "ml_fundamentals" | |
| ], | |
| "nice_to_have_skills": [ | |
| "deep_learning", | |
| "spark", | |
| "mlops", | |
| "cloud" | |
| ] | |
| }, | |
| "skill_gaps_mapping": { | |
| "deep_learning": { | |
| "demand_score": 0.80, | |
| "courses": [ | |
| "Deep Learning Specialization", | |
| "Fast.ai", | |
| "Stanford CS231n" | |
| ], | |
| "certifications": [ | |
| "TensorFlow Developer", | |
| "PyTorch Certified" | |
| ] | |
| }, | |
| "mlops": { | |
| "demand_score": 0.75, | |
| "courses": [ | |
| "MLOps Specialization", | |
| "ML Engineering for Production" | |
| ], | |
| "certifications": [ | |
| "AWS ML Specialty", | |
| "GCP ML Engineer" | |
| ] | |
| }, | |
| "statistics": { | |
| "demand_score": 0.70, | |
| "courses": [ | |
| "Statistics for Data Science", | |
| "A/B Testing Masterclass" | |
| ], | |
| "certifications": [] | |
| }, | |
| "big_data": { | |
| "demand_score": 0.65, | |
| "courses": [ | |
| "Spark for Data Engineering", | |
| "Databricks Academy" | |
| ], | |
| "certifications": [ | |
| "Databricks Certified", | |
| "Cloudera CCA" | |
| ] | |
| } | |
| }, | |
| "detection_keywords": [ | |
| "data science", | |
| "machine learning", | |
| "deep learning", | |
| "ai", | |
| "analytics", | |
| "data analyst", | |
| "ml engineer", | |
| "research scientist", | |
| "business intelligence", | |
| "statistical modeling", | |
| "predictive analytics", | |
| "data mining" | |
| ] | |
| } |