Spaces:
Sleeping
Sleeping
msg
Browse files- folder1/.env +2 -0
- folder1/__pycache__/config.cpython-313.pyc +0 -0
- folder1/__pycache__/database.cpython-313.pyc +0 -0
- folder1/__pycache__/logger.cpython-313.pyc +0 -0
- folder1/__pycache__/main.cpython-313.pyc +0 -0
- folder1/app.log +0 -0
- folder1/config.py +8 -0
- folder1/database.py +13 -0
- folder1/logger.py +13 -0
- folder1/logs/app.log +58 -0
- folder1/main.py +45 -0
- folder1/models/schemas.py +11 -0
- folder1/requirements.txt +5 -0
- folder1/routes/__pycache__/analytics.cpython-313.pyc +0 -0
- folder1/routes/analytics.py +102 -0
- folder1/utils/__pycache__/data_processing.cpython-313.pyc +0 -0
- folder1/utils/data_processing.py +30 -0
folder1/.env
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SUPABASE_URL=https://qxvpaoeakhddzabctekw.supabase.co
|
| 2 |
+
SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InF4dnBhb2Vha2hkZHphYmN0ZWt3Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDEwNjU2MzEsImV4cCI6MjA1NjY0MTYzMX0.I3GsBjFRfuBKw-KxmSJ7R5iKn2cgGegqIls2Bf32UpI
|
folder1/__pycache__/config.cpython-313.pyc
ADDED
|
Binary file (361 Bytes). View file
|
|
|
folder1/__pycache__/database.cpython-313.pyc
ADDED
|
Binary file (828 Bytes). View file
|
|
|
folder1/__pycache__/logger.cpython-313.pyc
ADDED
|
Binary file (680 Bytes). View file
|
|
|
folder1/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (1.96 kB). View file
|
|
|
folder1/app.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
folder1/config.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
# Load environment variables
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
| 8 |
+
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
|
folder1/database.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from supabase import create_client, Client
|
| 2 |
+
from config import SUPABASE_URL, SUPABASE_KEY
|
| 3 |
+
|
| 4 |
+
# Initialize Supabase client
|
| 5 |
+
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 6 |
+
|
| 7 |
+
def fetch_data():
|
| 8 |
+
"""Fetch HR analytics data from Supabase"""
|
| 9 |
+
response = supabase.table("HR analysis").select("*").execute()
|
| 10 |
+
if response.data:
|
| 11 |
+
return response.data
|
| 12 |
+
else:
|
| 13 |
+
raise ValueError("Failed to fetch data from Supabase")
|
folder1/logger.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
|
| 3 |
+
def setup_logger():
|
| 4 |
+
logging.basicConfig(
|
| 5 |
+
level=logging.INFO,
|
| 6 |
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
| 7 |
+
handlers=[
|
| 8 |
+
logging.FileHandler("app.log"),
|
| 9 |
+
logging.StreamHandler()
|
| 10 |
+
]
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
folder1/logs/app.log
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-03-04 20:13:37,909 - INFO - Will watch for changes in these directories: ['D:\\capstone\\dataset\\folder1']
|
| 2 |
+
2025-03-04 20:13:37,915 - INFO - Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
|
| 3 |
+
2025-03-04 20:13:37,921 - INFO - Started reloader process [984] using WatchFiles
|
| 4 |
+
2025-03-04 20:13:40,128 - INFO - 1 change detected
|
| 5 |
+
2025-03-04 20:13:43,883 - INFO - Started server process [21136]
|
| 6 |
+
2025-03-04 20:13:43,893 - INFO - Waiting for application startup.
|
| 7 |
+
2025-03-04 20:13:43,897 - INFO - Application startup complete.
|
| 8 |
+
2025-03-04 20:16:27,062 - INFO - 1 change detected
|
| 9 |
+
2025-03-04 20:16:27,067 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 10 |
+
2025-03-04 20:16:27,183 - INFO - Shutting down
|
| 11 |
+
2025-03-04 20:16:27,291 - INFO - Waiting for application shutdown.
|
| 12 |
+
2025-03-04 20:16:27,295 - INFO - Application shutdown complete.
|
| 13 |
+
2025-03-04 20:16:27,297 - INFO - Finished server process [21136]
|
| 14 |
+
2025-03-04 20:16:28,113 - INFO - 2 changes detected
|
| 15 |
+
2025-03-04 20:16:28,116 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 16 |
+
2025-03-04 20:16:30,880 - INFO - 5 changes detected
|
| 17 |
+
2025-03-04 20:16:41,194 - INFO - 1 change detected
|
| 18 |
+
2025-03-04 20:16:41,237 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 19 |
+
2025-03-04 20:16:43,626 - INFO - 5 changes detected
|
| 20 |
+
2025-03-04 20:16:47,175 - INFO - Started server process [3356]
|
| 21 |
+
2025-03-04 20:16:47,182 - INFO - Waiting for application startup.
|
| 22 |
+
2025-03-04 20:16:47,185 - INFO - Application startup complete.
|
| 23 |
+
2025-03-04 20:17:08,401 - INFO - 1 change detected
|
| 24 |
+
2025-03-04 20:17:08,403 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 25 |
+
2025-03-04 20:17:08,507 - INFO - Shutting down
|
| 26 |
+
2025-03-04 20:17:08,616 - INFO - Waiting for application shutdown.
|
| 27 |
+
2025-03-04 20:17:08,619 - INFO - Application shutdown complete.
|
| 28 |
+
2025-03-04 20:17:08,622 - INFO - Finished server process [3356]
|
| 29 |
+
2025-03-04 20:17:09,347 - INFO - 1 change detected
|
| 30 |
+
2025-03-04 20:17:11,124 - INFO - 5 changes detected
|
| 31 |
+
2025-03-04 20:17:15,863 - INFO - 1 change detected
|
| 32 |
+
2025-03-04 20:17:15,865 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 33 |
+
2025-03-04 20:17:18,191 - INFO - 5 changes detected
|
| 34 |
+
2025-03-04 20:17:23,542 - INFO - 1 change detected
|
| 35 |
+
2025-03-04 20:17:23,543 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 36 |
+
2025-03-04 20:17:25,729 - INFO - 5 changes detected
|
| 37 |
+
2025-03-04 20:17:26,701 - INFO - 1 change detected
|
| 38 |
+
2025-03-04 20:17:26,702 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 39 |
+
2025-03-04 20:17:30,341 - INFO - 1 change detected
|
| 40 |
+
2025-03-04 20:17:30,342 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 41 |
+
2025-03-04 20:17:31,745 - INFO - 1 change detected
|
| 42 |
+
2025-03-04 20:17:31,746 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 43 |
+
2025-03-04 20:17:37,489 - INFO - 6 changes detected
|
| 44 |
+
2025-03-04 20:17:37,490 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 45 |
+
2025-03-04 20:17:39,793 - INFO - 6 changes detected
|
| 46 |
+
2025-03-04 20:17:39,795 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 47 |
+
2025-03-04 20:17:44,073 - INFO - 1 change detected
|
| 48 |
+
2025-03-04 20:17:44,074 - WARNING - WatchFiles detected changes in 'routes\analytics.py'. Reloading...
|
| 49 |
+
2025-03-04 20:17:46,261 - INFO - 5 changes detected
|
| 50 |
+
2025-03-04 20:17:49,746 - INFO - Started server process [8820]
|
| 51 |
+
2025-03-04 20:17:49,754 - INFO - Waiting for application startup.
|
| 52 |
+
2025-03-04 20:17:49,756 - INFO - Application startup complete.
|
| 53 |
+
2025-03-04 20:18:25,834 - INFO - Will watch for changes in these directories: ['D:\\capstone\\dataset\\folder1']
|
| 54 |
+
2025-03-04 20:18:25,837 - INFO - Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
|
| 55 |
+
2025-03-04 20:18:25,840 - INFO - Started reloader process [10832] using WatchFiles
|
| 56 |
+
2025-03-04 20:18:31,454 - INFO - Started server process [9576]
|
| 57 |
+
2025-03-04 20:18:31,461 - INFO - Waiting for application startup.
|
| 58 |
+
2025-03-04 20:18:31,464 - INFO - Application startup complete.
|
folder1/main.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from routes import analytics
|
| 3 |
+
from logger import setup_logger
|
| 4 |
+
from database import supabase # Ensure this is properly initialized
|
| 5 |
+
from fastapi import FastAPI, HTTPException
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
# Initialize FastAPI app
|
| 9 |
+
app = FastAPI(title="HR Analytics API", description="HR Insights and Predictive Analytics")
|
| 10 |
+
|
| 11 |
+
# Setup logger
|
| 12 |
+
setup_logger()
|
| 13 |
+
|
| 14 |
+
# Include routes
|
| 15 |
+
app.include_router(analytics.router)
|
| 16 |
+
|
| 17 |
+
@app.get("/home")
|
| 18 |
+
def home():
|
| 19 |
+
"""
|
| 20 |
+
Home endpoint providing API overview.
|
| 21 |
+
"""
|
| 22 |
+
return {
|
| 23 |
+
"message": "Welcome to the HR Analytics and Predictive Insights API",
|
| 24 |
+
"description": "This API provides HR analytics, including employee satisfaction, performance metrics, and training insights.",
|
| 25 |
+
"employee_statistics": {
|
| 26 |
+
"total_employees": 2845,
|
| 27 |
+
"male_employees": 1257,
|
| 28 |
+
"female_employees": 1588
|
| 29 |
+
},
|
| 30 |
+
"endpoints": {
|
| 31 |
+
"/satisfaction-analysis": "Analyze employee satisfaction by department.",
|
| 32 |
+
"/department-performance": "Get average performance scores by department.",
|
| 33 |
+
"/training-analytics": "Analyze training program completion rates.",
|
| 34 |
+
"/engagement-performance-correlation": "Find correlation between engagement and performance scores.",
|
| 35 |
+
"/cost-benefit-analysis": "Perform cost-benefit analysis for training programs.",
|
| 36 |
+
"/training-effectiveness": "Evaluate the effectiveness of training programs.",
|
| 37 |
+
"/diversity-dashboard": "View diversity metrics by department.",
|
| 38 |
+
"/worklife-balance-impact": "Analyze the impact of work-life balance on performance.",
|
| 39 |
+
"/career-development": "Track employee career development over time."
|
| 40 |
+
},
|
| 41 |
+
"documentation": {
|
| 42 |
+
"Swagger UI": "/docs",
|
| 43 |
+
"Redoc": "/redoc"
|
| 44 |
+
}
|
| 45 |
+
}
|
folder1/models/schemas.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
class EmployeeSatisfaction(BaseModel):
|
| 5 |
+
department: str
|
| 6 |
+
satisfaction_score: float
|
| 7 |
+
|
| 8 |
+
class DepartmentPerformance(BaseModel):
|
| 9 |
+
department: str
|
| 10 |
+
performance_score: float
|
| 11 |
+
employee_rating: Optional[float] = None
|
folder1/requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
pandas
|
| 4 |
+
python-dotenv
|
| 5 |
+
supabase
|
folder1/routes/__pycache__/analytics.cpython-313.pyc
ADDED
|
Binary file (7.44 kB). View file
|
|
|
folder1/routes/analytics.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from database import fetch_data
|
| 4 |
+
from utils.data_processing import preprocess_data
|
| 5 |
+
from logger import logger
|
| 6 |
+
|
| 7 |
+
# Initialize router
|
| 8 |
+
router = APIRouter()
|
| 9 |
+
|
| 10 |
+
def get_filtered_data(columns):
|
| 11 |
+
"""Fetches and filters data based on required columns."""
|
| 12 |
+
try:
|
| 13 |
+
raw_data = fetch_data()
|
| 14 |
+
df = preprocess_data(pd.DataFrame(raw_data))
|
| 15 |
+
missing_cols = [col for col in columns if col not in df.columns]
|
| 16 |
+
if missing_cols:
|
| 17 |
+
raise HTTPException(status_code=400, detail=f"Missing columns: {missing_cols}")
|
| 18 |
+
return df[columns].dropna()
|
| 19 |
+
except Exception as e:
|
| 20 |
+
logger.error(f"Error fetching data: {e}")
|
| 21 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 22 |
+
|
| 23 |
+
@router.get("/satisfaction-analysis")
|
| 24 |
+
def satisfaction_analysis():
|
| 25 |
+
try:
|
| 26 |
+
data = get_filtered_data(['Satisfaction Score', 'DepartmentType'])
|
| 27 |
+
result = data.groupby("DepartmentType")["Satisfaction Score"].mean().reset_index()
|
| 28 |
+
return result.to_dict(orient="records")
|
| 29 |
+
except Exception as e:
|
| 30 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 31 |
+
|
| 32 |
+
@router.get("/department-performance")
|
| 33 |
+
def department_performance():
|
| 34 |
+
try:
|
| 35 |
+
data = get_filtered_data(["Performance Score", "Current Employee Rating", "DepartmentType"])
|
| 36 |
+
result = data.groupby("DepartmentType")[["Performance Score", "Current Employee Rating"]].mean().reset_index()
|
| 37 |
+
return result.to_dict(orient="records")
|
| 38 |
+
except Exception as e:
|
| 39 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 40 |
+
|
| 41 |
+
@router.get("/training-analytics")
|
| 42 |
+
def training_analytics():
|
| 43 |
+
try:
|
| 44 |
+
data = get_filtered_data(['Training Outcome', 'Training Cost'])
|
| 45 |
+
completion_rates = data.groupby("Training Cost")['Training Outcome'].value_counts(normalize=True).unstack(fill_value=0)
|
| 46 |
+
return completion_rates.to_dict()
|
| 47 |
+
except Exception as e:
|
| 48 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 49 |
+
|
| 50 |
+
@router.get("/engagement-performance-correlation")
|
| 51 |
+
def engagement_performance_correlation():
|
| 52 |
+
try:
|
| 53 |
+
data = get_filtered_data(['Engagement Score', 'Performance Score'])
|
| 54 |
+
correlation = data[['Engagement Score', 'Performance Score']].corr().iloc[0, 1]
|
| 55 |
+
return {"correlation_coefficient": correlation}
|
| 56 |
+
except Exception as e:
|
| 57 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 58 |
+
|
| 59 |
+
@router.get("/cost-benefit-analysis")
|
| 60 |
+
def cost_benefit_analysis():
|
| 61 |
+
try:
|
| 62 |
+
data = get_filtered_data(['Training Cost', 'Performance Score', 'DepartmentType'])
|
| 63 |
+
result = data.groupby("DepartmentType").apply(lambda g: g["Performance Score"].mean() / g["Training Cost"].sum()).reset_index(name="ROI")
|
| 64 |
+
return result.to_dict(orient="records")
|
| 65 |
+
except Exception as e:
|
| 66 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 67 |
+
|
| 68 |
+
@router.get("/training-effectiveness")
|
| 69 |
+
def training_effectiveness():
|
| 70 |
+
try:
|
| 71 |
+
data = get_filtered_data(['Training Outcome', 'Performance Score', 'Training Program Name'])
|
| 72 |
+
result = data.groupby("Training Program Name")["Performance Score"].mean().reset_index()
|
| 73 |
+
return result.to_dict(orient="records")
|
| 74 |
+
except Exception as e:
|
| 75 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 76 |
+
|
| 77 |
+
@router.get("/diversity-dashboard")
|
| 78 |
+
def diversity_dashboard():
|
| 79 |
+
try:
|
| 80 |
+
data = get_filtered_data(['GenderCode', 'RaceDesc', 'DepartmentType'])
|
| 81 |
+
diversity_metrics = data.groupby("DepartmentType")['GenderCode'].value_counts(normalize=True).unstack(fill_value=0)
|
| 82 |
+
return diversity_metrics.to_dict()
|
| 83 |
+
except Exception as e:
|
| 84 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 85 |
+
|
| 86 |
+
@router.get("/worklife-balance-impact")
|
| 87 |
+
def worklife_balance_impact():
|
| 88 |
+
try:
|
| 89 |
+
data = get_filtered_data(['Work-Life Balance Score', 'Performance Score'])
|
| 90 |
+
correlation = data[['Work-Life Balance Score', 'Performance Score']].corr().iloc[0, 1]
|
| 91 |
+
return {"correlation_coefficient": correlation}
|
| 92 |
+
except Exception as e:
|
| 93 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
| 94 |
+
|
| 95 |
+
@router.get("/career-development")
|
| 96 |
+
def career_development():
|
| 97 |
+
try:
|
| 98 |
+
data = get_filtered_data(['Employee ID', 'StartDate'])
|
| 99 |
+
career_progress = data.groupby("Employee ID")["StartDate"].count().reset_index(name="Career Movements")
|
| 100 |
+
return career_progress.to_dict(orient="records")
|
| 101 |
+
except Exception as e:
|
| 102 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
|
folder1/utils/__pycache__/data_processing.cpython-313.pyc
ADDED
|
Binary file (1.66 kB). View file
|
|
|
folder1/utils/data_processing.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
def preprocess_data(data: pd.DataFrame):
|
| 4 |
+
"""Clean and preprocess HR analytics data."""
|
| 5 |
+
|
| 6 |
+
# Convert date columns
|
| 7 |
+
date_columns = ['Survey Date', 'StartDate', 'DOB']
|
| 8 |
+
for col in date_columns:
|
| 9 |
+
if col in data.columns:
|
| 10 |
+
data[col] = pd.to_datetime(data[col], errors='coerce')
|
| 11 |
+
|
| 12 |
+
# Calculate Age from DOB
|
| 13 |
+
if 'DOB' in data.columns:
|
| 14 |
+
data['Age'] = (pd.to_datetime("today") - data['DOB']).dt.days // 365
|
| 15 |
+
|
| 16 |
+
# Convert Performance Score to Numeric
|
| 17 |
+
def clean_performance_score(value):
|
| 18 |
+
score_map = {"Exceeds": 5, "Fully Meets": 4, "Needs Improvement": 3, "PIP": 2}
|
| 19 |
+
if isinstance(value, (int, float)):
|
| 20 |
+
return value
|
| 21 |
+
if isinstance(value, str):
|
| 22 |
+
value = value.strip()
|
| 23 |
+
return score_map.get(value, None)
|
| 24 |
+
return None
|
| 25 |
+
|
| 26 |
+
if 'Performance Score' in data.columns:
|
| 27 |
+
data['Performance Score'] = data['Performance Score'].apply(clean_performance_score)
|
| 28 |
+
data['Performance Score'] = pd.to_numeric(data['Performance Score'], errors='coerce')
|
| 29 |
+
|
| 30 |
+
return data
|