student_sample_panel / common /PersonalityValues.py
elaineaishophouse's picture
Upload 15 files
441d880 verified
raw
history blame
5.24 kB
from pydantic import BaseModel
from typing import List, Dict, Optional
from collections import defaultdict
import datetime
import json
import os
import pandas as pd
import re
import numpy as np
import pprint
import math
from UserProfile import *
class PVEntry(BaseModel):
Num: int
Value: str
Question: str
Criteria: Optional[str]
Rating_Definition: Optional[str]
Adjacent_Values: Optional[List[str]]
Opposite_Values: Optional[List[str]]
Answer: Optional[str]
Score: Optional[int]
Assessment: Optional[str]
def parse_values(value_str: Optional[str], delimiter: str = ",") -> List[str]:
"""
Parses a delimited string into a list of strings.
If the value is None or NaN, return an empty list.
Args:
value_str (Optional[str]): The input string to parse.
delimiter (str): The delimiter to use for splitting. Defaults to ','.
Returns:
List[str]: A list of trimmed strings.
"""
if pd.isna(value_str) or not isinstance(value_str, str):
return []
return [v.strip() for v in value_str.split(delimiter)]
def extract_values_from_assessment_file(assessment_file):
"""
Extracts and aggregates Value and Score pairs from an Excel file by summing scores.
Args:
assessment_file (str): Path to the Excel file.
Returns:
list: A list of dictionaries with Value and total Score.
"""
# Read the Excel file
df = pd.read_excel(assessment_file)
# Ensure required columns are present
if "Value" not in df.columns or "Score" not in df.columns:
raise ValueError("The file must contain 'Value' and 'Score' columns.")
# Clean the data
df_clean = df[["Value", "Score"]].dropna()
df_clean["Score"] = pd.to_numeric(df_clean["Score"], errors="coerce")
# Group by Value and sum the scores
aggregated = df_clean.groupby("Value", as_index=False).sum()
# Convert to list of dictionaries
return aggregated.to_dict(orient="records")
class PVAssessment(BaseModel):
Entries: dict[str, list[PVEntry]]
@staticmethod
def generate_personality_assessment(personality_file):
df = pd.read_excel(personality_file)
# Use defaultdict to allow appending multiple PVEntries per value
entries = defaultdict(list)
for _, row in df.iterrows():
pv_entry = PVEntry(
Num=row["Num"],
Value=row["Value"],
Question=row["Assessment_Question"],
Criteria=row["Assessment_Criteria"],
Rating_Definition=row["Rating_Definition"],
Adjacent_Values=parse_values(row["Adjacent_Values"]),
Opposite_Values=parse_values(row["Opposite_Values"]),
Answer=None,
Score=None,
Assessment=None
)
entries[row["Value"]].append(pv_entry)
return PVAssessment(Entries=dict(entries))
@staticmethod
def get_score_definition(value, score, pv_assessment):
"""
Converts a numerical score (1-50) into a corresponding rating definition.
Args:
value (str): The personality value key.
score (int): A numerical score between 1 and 50.
pv_assessment (PVAssessment): The personality assessment object.
Returns:
str: The corresponding rating definition, or an empty string if not found.
"""
if not isinstance(pv_assessment, PVAssessment):
print("Error: Expected a PVAssessment object.")
return ""
if not isinstance(score, int) or score < 1 or score > 50:
print(f"Error: Invalid score '{score}' for '{value}'. Expected a number between 1 and 50.")
return ""
entry_list = pv_assessment.Entries.get(value)
if not entry_list or not isinstance(entry_list, list) or len(entry_list) == 0:
print(f"Error: No entries found for value '{value}'.")
return ""
# Use the first PVEntry in the list
pv_entry = entry_list[0]
rating_definition = pv_entry.Rating_Definition
if not isinstance(rating_definition, str) or not rating_definition:
print(f"Error: No valid rating definition found for '{value}'.")
return ""
rating_definition_list = parse_values(rating_definition, delimiter=";")
# Find the corresponding description based on the score range
for definition in rating_definition_list:
try:
range_part, description = definition.split(":", 1)
range_part = range_part.strip()
if "-" in range_part:
range_lower, range_upper = map(int, range_part.split("-"))
else:
range_lower = range_upper = int(range_part)
if range_lower <= score <= range_upper:
return description.strip()
except ValueError:
print(f"Error: Invalid rating definition format for '{value}': {definition}")
continue
print(f"Error: No matching rating definition found for score {score} in '{value}'.")
return ""