pratikshahp's picture
Create utils.py
1897667 verified
# utils.py
import pandas as pd
import glob
import re
import os
def extract_employee_data(employee_name):
"""Extract employee details using name from the Markdown file."""
details = {}
directory = "hrdataset/employees"
# Search for employee markdown file
search_pattern = f"{directory}/*_{employee_name.replace(' ', '_')}.md"
matching_files = glob.glob(search_pattern)
if not matching_files:
return f"Error: Employee file for {employee_name} not found."
employee_path = matching_files[0]
# Read the employee markdown file
with open(employee_path, "r", encoding="utf-8") as file:
content = file.read()
# Extract employee details using regex
details["name"] = re.search(r"\*\*Name:\*\* (.+)", content).group(1)
details["role"] = re.search(r"\*\*Role:\*\* (.+)", content).group(1)
details["joining_date"] = re.search(r"\*\*Joining Date:\*\* (\d{4}-\d{2}-\d{2})", content).group(1)
# Calculate experience
joining_year = int(details["joining_date"].split("-")[0])
details["experience"] = 2024 - joining_year
# Extract performance ratings
rating_match = re.findall(r"\*\*(\d{4}):\*\* ([\d.]+)", content)
if rating_match:
latest_rating = sorted(rating_match, key=lambda x: int(x[0]))[-1]
details["rating"] = float(latest_rating[1])
return details
def get_survey_sentiment(employee_name):
"""Retrieve sentiment analysis for an employee from survey data."""
survey_file_path = "hrdataset/surveys/Employee_Culture_Survey_Responses.csv"
if not os.path.exists(survey_file_path):
return "Error: Survey data not found."
survey_df = pd.read_csv(survey_file_path)
filtered_df = survey_df[survey_df['Employee'].str.strip().str.lower() == employee_name.strip().lower()]
if filtered_df.empty:
return "Error: No survey data found for this employee."
sentiment_counts = filtered_df['Sentiment'].str.strip().str.lower().value_counts()
positive_count = sentiment_counts.get('positive', 0)
negative_count = sentiment_counts.get('negative', 0)
if positive_count > negative_count:
return "Positive"
elif negative_count > positive_count:
return "Negative"
else:
return "Neutral"