File size: 4,390 Bytes
441d880
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import re
import datetime
import textwrap

from Config import Config

import pandas as pd
import numpy as np

class DataDictionary:
    def __init__(self):
        """
        Initialize the DataDictionary instance with an empty list of entries.
        """
        self.entries = []

    def add_entry(self, entry):
        """
        Add an entry to the data dictionary. Entry should be a dict with expected keys.
        Filters out None or empty-string values, and ensures required keys are present.
        """
        required_keys = {"Type", "Parameter", "Description"}
        missing = required_keys - entry.keys()
        if missing:
            raise ValueError(f"Missing required fields in entry: {missing}")

        # Optionally filter or transform the entry
        clean_entry = {k: v for k, v in entry.items() if v is not None and v != ""}

        self.entries.append(clean_entry)
        
        
    def get_types(self):
        """
        Extract all types defined for the data dictionary, preserving insertion order.

        Returns:
        list: A list of all unique types in the dictionary, preserving order.
        """
        seen = set()
        ordered_types = []
        for entry in self.entries:
            Type = entry.get("Type")
            if Type not in seen and Type is not None:
                seen.add(Type)
                ordered_types.append(Type)
        return ordered_types

    def get_parameters(self, type="All"):
        """
        Extract parameters of a particular type from the data dictionary, preserving insertion order.

        Args:
        type (str): Type of entries to return (defaults to "All").

        Returns:
        list: A list of all unique parameters matching the specified type, preserving order.
        """
        seen = set()
        ordered_parameters = []
    
        for entry in self.entries:
            if type == "All" or entry["Type"] == type:
                parameter = entry["Parameter"]
                if parameter not in seen:
                    seen.add(parameter)
                    ordered_parameters.append(parameter)
    
        return ordered_parameters

    def get_columns(self):
        """
        Generate a list of column names in the format type_parameter.

        Returns:
        list: A list of column names preserving order.
        """
        columns = []
        for entry in self.entries:
            Type = entry["Type"]
            Parameter = entry["Parameter"]
            if Type and Parameter:  # Ensure both Type and Parameter exist
                columns.append(f"{Type}_{Parameter}")
        return columns
      
    def filter_entries(self, Source=None, Type=None, Parameter=None):
        """
        Filter entries based on Source, Type, or Parameter.

        Args:
        Source (str, optional): The source to filter by.
        Type (str, optional): The type to filter by.
        Parameter (str, optional): The parameter to filter by.

        Returns:
        list: A list of entries matching the filter criteria.
        """
        return [
            entry for entry in self.entries
            if (Source is None or entry["Source"] == Source) and
               (Type is None or entry["Type"] == Type) and
               (Parameter is None or entry["Parameter"] == Parameter)
        ]

    @staticmethod
    def generate_dictionary(data_dictionary_file):
        """
        Static method to generate a DataDictionary instance from an Excel (.xlsx) file.

        Args:
        data_dictionary_file (str): The path to the Excel file containing data dictionary entries.

        Returns:
        DataDictionary: A populated DataDictionary instance.
        """
        import pandas as pd  # Ensure pandas is imported
        df = pd.read_excel(data_dictionary_file)

        data_dictionary = DataDictionary()

        for _, row in df.iterrows():
            data_dictionary.add_entry({
                "Type": row["Type"],
                "Parameter": row["Parameter"],
                "Description": row["Description"],
                "Source": row.get("Source"),
                "ValidValues": row.get("Scoring_Method"),
                "InferredLogic": row.get("Inferred_Logic"),
            })
                    
        return data_dictionary

    def __repr__(self):
        return f"DataDictionary({len(self.entries)} entries)"