from pydantic import BaseModel from typing import List, Dict, Optional from collections import defaultdict import datetime import json import os import pandas as pd import re import numpy as np import pprint import math from UserProfile import * class PVEntry(BaseModel): Num: int Value: str Question: str Criteria: Optional[str] Rating_Definition: Optional[str] Adjacent_Values: Optional[List[str]] Opposite_Values: Optional[List[str]] Answer: Optional[str] Score: Optional[int] Assessment: Optional[str] def parse_values(value_str: Optional[str], delimiter: str = ",") -> List[str]: """ Parses a delimited string into a list of strings. If the value is None or NaN, return an empty list. Args: value_str (Optional[str]): The input string to parse. delimiter (str): The delimiter to use for splitting. Defaults to ','. Returns: List[str]: A list of trimmed strings. """ if pd.isna(value_str) or not isinstance(value_str, str): return [] return [v.strip() for v in value_str.split(delimiter)] def extract_values_from_assessment_file(assessment_file): """ Extracts and aggregates Value and Score pairs from an Excel file by summing scores. Args: assessment_file (str): Path to the Excel file. Returns: list: A list of dictionaries with Value and total Score. """ # Read the Excel file df = pd.read_excel(assessment_file) # Ensure required columns are present if "Value" not in df.columns or "Score" not in df.columns: raise ValueError("The file must contain 'Value' and 'Score' columns.") # Clean the data df_clean = df[["Value", "Score"]].dropna() df_clean["Score"] = pd.to_numeric(df_clean["Score"], errors="coerce") # Group by Value and sum the scores aggregated = df_clean.groupby("Value", as_index=False).sum() # Convert to list of dictionaries return aggregated.to_dict(orient="records") class PVAssessment(BaseModel): Entries: dict[str, list[PVEntry]] @staticmethod def generate_personality_assessment(personality_file): df = pd.read_excel(personality_file) # Use defaultdict to allow appending multiple PVEntries per value entries = defaultdict(list) for _, row in df.iterrows(): pv_entry = PVEntry( Num=row["Num"], Value=row["Value"], Question=row["Assessment_Question"], Criteria=row["Assessment_Criteria"], Rating_Definition=row["Rating_Definition"], Adjacent_Values=parse_values(row["Adjacent_Values"]), Opposite_Values=parse_values(row["Opposite_Values"]), Answer=None, Score=None, Assessment=None ) entries[row["Value"]].append(pv_entry) return PVAssessment(Entries=dict(entries)) @staticmethod def get_score_definition(value, score, pv_assessment): """ Converts a numerical score (1-50) into a corresponding rating definition. Args: value (str): The personality value key. score (int): A numerical score between 1 and 50. pv_assessment (PVAssessment): The personality assessment object. Returns: str: The corresponding rating definition, or an empty string if not found. """ if not isinstance(pv_assessment, PVAssessment): print("Error: Expected a PVAssessment object.") return "" if not isinstance(score, int) or score < 1 or score > 50: print(f"Error: Invalid score '{score}' for '{value}'. Expected a number between 1 and 50.") return "" entry_list = pv_assessment.Entries.get(value) if not entry_list or not isinstance(entry_list, list) or len(entry_list) == 0: print(f"Error: No entries found for value '{value}'.") return "" # Use the first PVEntry in the list pv_entry = entry_list[0] rating_definition = pv_entry.Rating_Definition if not isinstance(rating_definition, str) or not rating_definition: print(f"Error: No valid rating definition found for '{value}'.") return "" rating_definition_list = parse_values(rating_definition, delimiter=";") # Find the corresponding description based on the score range for definition in rating_definition_list: try: range_part, description = definition.split(":", 1) range_part = range_part.strip() if "-" in range_part: range_lower, range_upper = map(int, range_part.split("-")) else: range_lower = range_upper = int(range_part) if range_lower <= score <= range_upper: return description.strip() except ValueError: print(f"Error: Invalid rating definition format for '{value}': {definition}") continue print(f"Error: No matching rating definition found for score {score} in '{value}'.") return ""