Spaces:
Build error
Build error
| import re | |
| import datetime | |
| import textwrap | |
| from Config import Config | |
| import pandas as pd | |
| import numpy as np | |
| class DataDictionary: | |
| def __init__(self): | |
| """ | |
| Initialize the DataDictionary instance with an empty list of entries. | |
| """ | |
| self.entries = [] | |
| def add_entry(self, entry): | |
| """ | |
| Add an entry to the data dictionary. Entry should be a dict with expected keys. | |
| Filters out None or empty-string values, and ensures required keys are present. | |
| """ | |
| required_keys = {"Type", "Parameter", "Description"} | |
| missing = required_keys - entry.keys() | |
| if missing: | |
| raise ValueError(f"Missing required fields in entry: {missing}") | |
| # Optionally filter or transform the entry | |
| clean_entry = {k: v for k, v in entry.items() if v is not None and v != ""} | |
| self.entries.append(clean_entry) | |
| def get_types(self): | |
| """ | |
| Extract all types defined for the data dictionary, preserving insertion order. | |
| Returns: | |
| list: A list of all unique types in the dictionary, preserving order. | |
| """ | |
| seen = set() | |
| ordered_types = [] | |
| for entry in self.entries: | |
| Type = entry.get("Type") | |
| if Type not in seen and Type is not None: | |
| seen.add(Type) | |
| ordered_types.append(Type) | |
| return ordered_types | |
| def get_parameters(self, type="All"): | |
| """ | |
| Extract parameters of a particular type from the data dictionary, preserving insertion order. | |
| Args: | |
| type (str): Type of entries to return (defaults to "All"). | |
| Returns: | |
| list: A list of all unique parameters matching the specified type, preserving order. | |
| """ | |
| seen = set() | |
| ordered_parameters = [] | |
| for entry in self.entries: | |
| if type == "All" or entry["Type"] == type: | |
| parameter = entry["Parameter"] | |
| if parameter not in seen: | |
| seen.add(parameter) | |
| ordered_parameters.append(parameter) | |
| return ordered_parameters | |
| def get_columns(self): | |
| """ | |
| Generate a list of column names in the format type_parameter. | |
| Returns: | |
| list: A list of column names preserving order. | |
| """ | |
| columns = [] | |
| for entry in self.entries: | |
| Type = entry["Type"] | |
| Parameter = entry["Parameter"] | |
| if Type and Parameter: # Ensure both Type and Parameter exist | |
| columns.append(f"{Type}_{Parameter}") | |
| return columns | |
| def filter_entries(self, Source=None, Type=None, Parameter=None): | |
| """ | |
| Filter entries based on Source, Type, or Parameter. | |
| Args: | |
| Source (str, optional): The source to filter by. | |
| Type (str, optional): The type to filter by. | |
| Parameter (str, optional): The parameter to filter by. | |
| Returns: | |
| list: A list of entries matching the filter criteria. | |
| """ | |
| return [ | |
| entry for entry in self.entries | |
| if (Source is None or entry["Source"] == Source) and | |
| (Type is None or entry["Type"] == Type) and | |
| (Parameter is None or entry["Parameter"] == Parameter) | |
| ] | |
| def generate_dictionary(data_dictionary_file): | |
| """ | |
| Static method to generate a DataDictionary instance from an Excel (.xlsx) file. | |
| Args: | |
| data_dictionary_file (str): The path to the Excel file containing data dictionary entries. | |
| Returns: | |
| DataDictionary: A populated DataDictionary instance. | |
| """ | |
| import pandas as pd # Ensure pandas is imported | |
| df = pd.read_excel(data_dictionary_file) | |
| data_dictionary = DataDictionary() | |
| for _, row in df.iterrows(): | |
| data_dictionary.add_entry({ | |
| "Type": row["Type"], | |
| "Parameter": row["Parameter"], | |
| "Description": row["Description"], | |
| "Source": row.get("Source"), | |
| "ValidValues": row.get("Scoring_Method"), | |
| "InferredLogic": row.get("Inferred_Logic"), | |
| }) | |
| return data_dictionary | |
| def __repr__(self): | |
| return f"DataDictionary({len(self.entries)} entries)" | |