Spaces:
Sleeping
Sleeping
| import re | |
| import datetime | |
| import textwrap | |
| from common.Config import Config | |
| import pandas as pd | |
| import numpy as np | |
| class DataDictionary: | |
| def __init__(self): | |
| """ | |
| Initialize the DataDictionary instance with an empty list of entries. | |
| """ | |
| self.entries = [] | |
| def add_entry(self, Type, Parameter, Description, Source, ValidValues, InferredLogic=None): | |
| """ | |
| Add an entry to the data dictionary. | |
| Args: | |
| Type (str): The type of the entry (e.g., "Numeric", "Categorical"). | |
| Parameter (str): The parameter name. | |
| Description (str): A brief description of the parameter. | |
| Source (str): The source of the parameter. | |
| ValidValues (str): Valid values or scoring method for the parameter. | |
| InferredLogic (str, optional): Inferred logic for the parameter. | |
| """ | |
| entry = { | |
| "Type": Type, | |
| "Parameter": Parameter, | |
| "Description": Description, | |
| "Source": Source, | |
| "ValidValues": ValidValues, | |
| "InferredLogic": InferredLogic, | |
| } | |
| self.entries.append(entry) | |
| def get_types(self): | |
| """ | |
| Extract all types defined for the data dictionary, preserving insertion order. | |
| Returns: | |
| list: A list of all unique types in the dictionary, preserving order. | |
| """ | |
| seen = set() | |
| ordered_types = [] | |
| for entry in self.entries: | |
| Type = entry.get("Type") | |
| if Type not in seen and Type is not None: | |
| seen.add(Type) | |
| ordered_types.append(Type) | |
| return ordered_types | |
| def get_parameters(self, type="All"): | |
| """ | |
| Extract parameters of a particular type from the data dictionary, preserving insertion order. | |
| Args: | |
| type (str): Type of entries to return (defaults to "All"). | |
| Returns: | |
| list: A list of all unique parameters matching the specified type, preserving order. | |
| """ | |
| seen = set() | |
| ordered_parameters = [] | |
| for entry in self.entries: | |
| if type == "All" or entry["Type"] == type: | |
| parameter = entry["Parameter"] | |
| if parameter not in seen: | |
| seen.add(parameter) | |
| ordered_parameters.append(parameter) | |
| return ordered_parameters | |
| def get_columns(self): | |
| """ | |
| Generate a list of column names in the format type_parameter. | |
| Returns: | |
| list: A list of column names preserving order. | |
| """ | |
| columns = [] | |
| for entry in self.entries: | |
| Type = entry["Type"] | |
| Parameter = entry["Parameter"] | |
| if Type and Parameter: # Ensure both Type and Parameter exist | |
| columns.append(f"{Type}_{Parameter}") | |
| return columns | |
| def filter_entries(self, Source=None, Type=None, Parameter=None): | |
| """ | |
| Filter entries based on Source, Type, or Parameter. | |
| Args: | |
| Source (str, optional): The source to filter by. | |
| Type (str, optional): The type to filter by. | |
| Parameter (str, optional): The parameter to filter by. | |
| Returns: | |
| list: A list of entries matching the filter criteria. | |
| """ | |
| return [ | |
| entry for entry in self.entries | |
| if (Source is None or entry["Source"] == Source) and | |
| (Type is None or entry["Type"] == Type) and | |
| (Parameter is None or entry["Parameter"] == Parameter) | |
| ] | |
| def generate_dictionary(data_dictionary_file): | |
| """ | |
| Static method to generate a DataDictionary instance from an Excel (.xlsx) file. | |
| Args: | |
| data_dictionary_file (str): The path to the Excel file containing data dictionary entries. | |
| Returns: | |
| DataDictionary: A populated DataDictionary instance. | |
| """ | |
| import pandas as pd # Ensure pandas is imported | |
| df = pd.read_excel(data_dictionary_file) | |
| data_dictionary = DataDictionary() | |
| for _, row in df.iterrows(): | |
| data_dictionary.add_entry( | |
| Type=row['Type'], | |
| Parameter=row['Parameter'], | |
| Description=row['Description'], | |
| Source=row['Source'], | |
| ValidValues=row['Scoring method'], | |
| InferredLogic=row.get('Inferred_Logic') | |
| ) | |
| return data_dictionary | |
| def __repr__(self): | |
| return f"DataDictionary({len(self.entries)} entries)" | |