File size: 4,698 Bytes
f71ff04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import re
import datetime
import textwrap

from common.Config import Config

import pandas as pd
import numpy as np

class DataDictionary:
    def __init__(self):
        """

        Initialize the DataDictionary instance with an empty list of entries.

        """
        self.entries = []

    def add_entry(self, Type, Parameter, Description, Source, ValidValues, InferredLogic=None):
        """

        Add an entry to the data dictionary.



        Args:

        Type (str): The type of the entry (e.g., "Numeric", "Categorical").

        Parameter (str): The parameter name.

        Description (str): A brief description of the parameter.

        Source (str): The source of the parameter.

        ValidValues (str): Valid values or scoring method for the parameter.

        InferredLogic (str, optional): Inferred logic for the parameter.

        """
        entry = {
            "Type": Type,
            "Parameter": Parameter,
            "Description": Description,
            "Source": Source,
            "ValidValues": ValidValues,
            "InferredLogic": InferredLogic,
        }
        self.entries.append(entry)

    def get_types(self):
        """

        Extract all types defined for the data dictionary, preserving insertion order.



        Returns:

        list: A list of all unique types in the dictionary, preserving order.

        """
        seen = set()
        ordered_types = []
        for entry in self.entries:
            Type = entry.get("Type")
            if Type not in seen and Type is not None:
                seen.add(Type)
                ordered_types.append(Type)
        return ordered_types

    def get_parameters(self, type="All"):
        """

        Extract parameters of a particular type from the data dictionary, preserving insertion order.



        Args:

        type (str): Type of entries to return (defaults to "All").



        Returns:

        list: A list of all unique parameters matching the specified type, preserving order.

        """
        seen = set()
        ordered_parameters = []
    
        for entry in self.entries:
            if type == "All" or entry["Type"] == type:
                parameter = entry["Parameter"]
                if parameter not in seen:
                    seen.add(parameter)
                    ordered_parameters.append(parameter)
    
        return ordered_parameters

    def get_columns(self):
        """

        Generate a list of column names in the format type_parameter.



        Returns:

        list: A list of column names preserving order.

        """
        columns = []
        for entry in self.entries:
            Type = entry["Type"]
            Parameter = entry["Parameter"]
            if Type and Parameter:  # Ensure both Type and Parameter exist
                columns.append(f"{Type}_{Parameter}")
        return columns
      
    def filter_entries(self, Source=None, Type=None, Parameter=None):
        """

        Filter entries based on Source, Type, or Parameter.



        Args:

        Source (str, optional): The source to filter by.

        Type (str, optional): The type to filter by.

        Parameter (str, optional): The parameter to filter by.



        Returns:

        list: A list of entries matching the filter criteria.

        """
        return [
            entry for entry in self.entries
            if (Source is None or entry["Source"] == Source) and
               (Type is None or entry["Type"] == Type) and
               (Parameter is None or entry["Parameter"] == Parameter)
        ]

    @staticmethod
    def generate_dictionary(data_dictionary_file):
        """

        Static method to generate a DataDictionary instance from an Excel (.xlsx) file.



        Args:

        data_dictionary_file (str): The path to the Excel file containing data dictionary entries.



        Returns:

        DataDictionary: A populated DataDictionary instance.

        """
        import pandas as pd  # Ensure pandas is imported
        df = pd.read_excel(data_dictionary_file)

        data_dictionary = DataDictionary()
        for _, row in df.iterrows():
            data_dictionary.add_entry(
                Type=row['Type'],
                Parameter=row['Parameter'],
                Description=row['Description'],
                Source=row['Source'],
                ValidValues=row['Scoring method'],
                InferredLogic=row.get('Inferred_Logic')
            )
        return data_dictionary

    def __repr__(self):
        return f"DataDictionary({len(self.entries)} entries)"