# utils.py import pandas as pd import glob import re import os def extract_employee_data(employee_name): """Extract employee details using name from the Markdown file.""" details = {} directory = "hrdataset/employees" # Search for employee markdown file search_pattern = f"{directory}/*_{employee_name.replace(' ', '_')}.md" matching_files = glob.glob(search_pattern) if not matching_files: return f"Error: Employee file for {employee_name} not found." employee_path = matching_files[0] # Read the employee markdown file with open(employee_path, "r", encoding="utf-8") as file: content = file.read() # Extract employee details using regex details["name"] = re.search(r"\*\*Name:\*\* (.+)", content).group(1) details["role"] = re.search(r"\*\*Role:\*\* (.+)", content).group(1) details["joining_date"] = re.search(r"\*\*Joining Date:\*\* (\d{4}-\d{2}-\d{2})", content).group(1) # Calculate experience joining_year = int(details["joining_date"].split("-")[0]) details["experience"] = 2024 - joining_year # Extract performance ratings rating_match = re.findall(r"\*\*(\d{4}):\*\* ([\d.]+)", content) if rating_match: latest_rating = sorted(rating_match, key=lambda x: int(x[0]))[-1] details["rating"] = float(latest_rating[1]) return details def get_survey_sentiment(employee_name): """Retrieve sentiment analysis for an employee from survey data.""" survey_file_path = "hrdataset/surveys/Employee_Culture_Survey_Responses.csv" if not os.path.exists(survey_file_path): return "Error: Survey data not found." survey_df = pd.read_csv(survey_file_path) filtered_df = survey_df[survey_df['Employee'].str.strip().str.lower() == employee_name.strip().lower()] if filtered_df.empty: return "Error: No survey data found for this employee." sentiment_counts = filtered_df['Sentiment'].str.strip().str.lower().value_counts() positive_count = sentiment_counts.get('positive', 0) negative_count = sentiment_counts.get('negative', 0) if positive_count > negative_count: return "Positive" elif negative_count > positive_count: return "Negative" else: return "Neutral"