Spaces:

nat232
/

student_sample_panel

Build error

File size: 5,239 Bytes

441d880

from pydantic import BaseModel
from typing import List, Dict, Optional
from collections import defaultdict

import datetime
import json
import os
import pandas as pd
import re
import numpy as np
import pprint
import math

from UserProfile import *

class PVEntry(BaseModel):
    Num: int
    Value: str
    Question: str
    Criteria: Optional[str]
    Rating_Definition: Optional[str]
    Adjacent_Values: Optional[List[str]]
    Opposite_Values: Optional[List[str]]
    Answer: Optional[str]
    Score: Optional[int]
    Assessment: Optional[str]

def parse_values(value_str: Optional[str], delimiter: str = ",") -> List[str]:
    """
    Parses a delimited string into a list of strings.
    If the value is None or NaN, return an empty list.

    Args:
        value_str (Optional[str]): The input string to parse.
        delimiter (str): The delimiter to use for splitting. Defaults to ','.

    Returns:
        List[str]: A list of trimmed strings.
    """
    if pd.isna(value_str) or not isinstance(value_str, str):
        return []
    return [v.strip() for v in value_str.split(delimiter)]
      

def extract_values_from_assessment_file(assessment_file):
    """
    Extracts and aggregates Value and Score pairs from an Excel file by summing scores.

    Args:
        assessment_file (str): Path to the Excel file.

    Returns:
        list: A list of dictionaries with Value and total Score.
    """
    # Read the Excel file
    df = pd.read_excel(assessment_file)

    # Ensure required columns are present
    if "Value" not in df.columns or "Score" not in df.columns:
        raise ValueError("The file must contain 'Value' and 'Score' columns.")

    # Clean the data
    df_clean = df[["Value", "Score"]].dropna()
    df_clean["Score"] = pd.to_numeric(df_clean["Score"], errors="coerce")

    # Group by Value and sum the scores
    aggregated = df_clean.groupby("Value", as_index=False).sum()

    # Convert to list of dictionaries
    return aggregated.to_dict(orient="records")

class PVAssessment(BaseModel):
    Entries: dict[str, list[PVEntry]]

    @staticmethod
    def generate_personality_assessment(personality_file):
        df = pd.read_excel(personality_file)

        # Use defaultdict to allow appending multiple PVEntries per value
        entries = defaultdict(list)

        for _, row in df.iterrows():
            pv_entry = PVEntry(
                Num=row["Num"],
                Value=row["Value"],
                Question=row["Assessment_Question"],
                Criteria=row["Assessment_Criteria"],
                Rating_Definition=row["Rating_Definition"],
                Adjacent_Values=parse_values(row["Adjacent_Values"]),
                Opposite_Values=parse_values(row["Opposite_Values"]),
                Answer=None,
                Score=None,
                Assessment=None
            )
            entries[row["Value"]].append(pv_entry)

        return PVAssessment(Entries=dict(entries))      
      
    @staticmethod
    def get_score_definition(value, score, pv_assessment):
        """
        Converts a numerical score (1-50) into a corresponding rating definition.

        Args:
        value (str): The personality value key.
        score (int): A numerical score between 1 and 50.
        pv_assessment (PVAssessment): The personality assessment object.

        Returns:
        str: The corresponding rating definition, or an empty string if not found.
        """
        if not isinstance(pv_assessment, PVAssessment):
            print("Error: Expected a PVAssessment object.")
            return ""

        if not isinstance(score, int) or score < 1 or score > 50:
            print(f"Error: Invalid score '{score}' for '{value}'. Expected a number between 1 and 50.")
            return ""
     
        entry_list = pv_assessment.Entries.get(value)
        if not entry_list or not isinstance(entry_list, list) or len(entry_list) == 0:
            print(f"Error: No entries found for value '{value}'.")
            return ""

        # Use the first PVEntry in the list
        pv_entry = entry_list[0]

        rating_definition = pv_entry.Rating_Definition
        if not isinstance(rating_definition, str) or not rating_definition:
            print(f"Error: No valid rating definition found for '{value}'.")
            return ""          

        rating_definition_list = parse_values(rating_definition, delimiter=";")

        # Find the corresponding description based on the score range
        for definition in rating_definition_list:
            try:
                range_part, description = definition.split(":", 1)
                range_part = range_part.strip()

                if "-" in range_part:
                    range_lower, range_upper = map(int, range_part.split("-"))
                else:
                    range_lower = range_upper = int(range_part)

                if range_lower <= score <= range_upper:
                    return description.strip()

            except ValueError:
                print(f"Error: Invalid rating definition format for '{value}': {definition}")
                continue

        print(f"Error: No matching rating definition found for score {score} in '{value}'.")
        return ""