File size: 3,531 Bytes
9eecab5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from utils.logger import logger


class DataFrameAgent:

    def __init__(self, registry):
        self.registry = registry


    def _detect_dataset(self, query, datasets):
        """
        Detect dataset name from query.
        Falls back to first dataset if none mentioned.
        """
        q = query.lower()

        for d in datasets:
            if d.lower() in q:
                return d

        logger.info("Dataset not specified, using default dataset.")
        return datasets[0]


    def _detect_column(self, query, columns):
        """
        Detect column name from query.
        """
        q = query.lower()

        for col in columns:
            if col.lower() in q:
                return col

        return None


    def _detect_number(self, query, default=5):
        """
        Extract number from query (used for top N rows).
        """
        words = query.split()

        for w in words:
            if w.isdigit():
                return int(w)

        return default


    def handle(self, query):

        q = query.lower()

        try:

            datasets = self.registry.list_datasets()

            if not datasets:
                logger.warning("DataFrameAgent called with no datasets loaded.")
                return "No datasets available."

            dataset = self._detect_dataset(q, datasets)

            df = self.registry.load_dataframe(dataset)

            columns = df.columns.tolist()

        except Exception as e:
            logger.error(f"Failed loading dataset in DataFrameAgent | {e}")
            return "Failed to load dataset."


        try:

            # -------- SHOW ROWS --------
            if "top" in q or "first" in q:

                n = self._detect_number(q, default=5)

                logger.info(f"Showing first {n} rows from {dataset}")

                return df.head(n)


            # -------- ROW COUNT --------
            if "how many rows" in q or "row count" in q or "count rows" in q:

                logger.info(f"Row count requested for {dataset}")

                return f"{dataset} has {len(df)} rows."


            # -------- COLUMN DETECTION --------
            column = self._detect_column(q, columns)

            if column is None and any(
                word in q for word in ["average", "mean", "max", "min", "highest", "lowest"]
            ):
                logger.warning("Column not detected for dataframe operation.")
                return "Column not found in dataset."


            # -------- MEAN / AVERAGE --------
            if "average" in q or "mean" in q:

                result = df[column].mean()

                logger.info(f"Mean computed for {column} in {dataset}")

                return f"Average {column} in {dataset}: {round(result, 2)}"


            # -------- MAX --------
            if "max" in q or "highest" in q:

                result = df[column].max()

                logger.info(f"Max computed for {column} in {dataset}")

                return f"Max {column} in {dataset}: {result}"


            # -------- MIN --------
            if "min" in q or "lowest" in q:

                result = df[column].min()

                logger.info(f"Min computed for {column} in {dataset}")

                return f"Min {column} in {dataset}: {result}"


            return "DataFrame query not understood."


        except Exception as e:

            logger.error(f"DataFrame operation failed | Query: {query} | Error: {e}")

            return "DataFrame agent error."