Spaces:
Running
Running
File size: 3,531 Bytes
9eecab5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | from utils.logger import logger
class DataFrameAgent:
def __init__(self, registry):
self.registry = registry
def _detect_dataset(self, query, datasets):
"""
Detect dataset name from query.
Falls back to first dataset if none mentioned.
"""
q = query.lower()
for d in datasets:
if d.lower() in q:
return d
logger.info("Dataset not specified, using default dataset.")
return datasets[0]
def _detect_column(self, query, columns):
"""
Detect column name from query.
"""
q = query.lower()
for col in columns:
if col.lower() in q:
return col
return None
def _detect_number(self, query, default=5):
"""
Extract number from query (used for top N rows).
"""
words = query.split()
for w in words:
if w.isdigit():
return int(w)
return default
def handle(self, query):
q = query.lower()
try:
datasets = self.registry.list_datasets()
if not datasets:
logger.warning("DataFrameAgent called with no datasets loaded.")
return "No datasets available."
dataset = self._detect_dataset(q, datasets)
df = self.registry.load_dataframe(dataset)
columns = df.columns.tolist()
except Exception as e:
logger.error(f"Failed loading dataset in DataFrameAgent | {e}")
return "Failed to load dataset."
try:
# -------- SHOW ROWS --------
if "top" in q or "first" in q:
n = self._detect_number(q, default=5)
logger.info(f"Showing first {n} rows from {dataset}")
return df.head(n)
# -------- ROW COUNT --------
if "how many rows" in q or "row count" in q or "count rows" in q:
logger.info(f"Row count requested for {dataset}")
return f"{dataset} has {len(df)} rows."
# -------- COLUMN DETECTION --------
column = self._detect_column(q, columns)
if column is None and any(
word in q for word in ["average", "mean", "max", "min", "highest", "lowest"]
):
logger.warning("Column not detected for dataframe operation.")
return "Column not found in dataset."
# -------- MEAN / AVERAGE --------
if "average" in q or "mean" in q:
result = df[column].mean()
logger.info(f"Mean computed for {column} in {dataset}")
return f"Average {column} in {dataset}: {round(result, 2)}"
# -------- MAX --------
if "max" in q or "highest" in q:
result = df[column].max()
logger.info(f"Max computed for {column} in {dataset}")
return f"Max {column} in {dataset}: {result}"
# -------- MIN --------
if "min" in q or "lowest" in q:
result = df[column].min()
logger.info(f"Min computed for {column} in {dataset}")
return f"Min {column} in {dataset}: {result}"
return "DataFrame query not understood."
except Exception as e:
logger.error(f"DataFrame operation failed | Query: {query} | Error: {e}")
return "DataFrame agent error." |