Spaces:
Sleeping
Sleeping
Nyha15 commited on
Commit Β·
230d1ef
1
Parent(s): cd765a0
Removed LLM call
Browse files
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
app.py β Data Analyst Duo MCP (no OpenAI) Gradio Space
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
import os
|
| 6 |
-
import json
|
| 7 |
import uuid
|
| 8 |
import logging
|
| 9 |
import datetime
|
|
@@ -12,14 +12,14 @@ import pandas as pd
|
|
| 12 |
import numpy as np
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
-
# βββ Logging
|
| 16 |
logging.basicConfig(
|
| 17 |
level=logging.INFO,
|
| 18 |
format="%(asctime)s %(levelname)s:%(name)s: %(message)s"
|
| 19 |
)
|
| 20 |
logger = logging.getLogger("DataAnalystDuo")
|
| 21 |
|
| 22 |
-
# βββ MCP Core βββββββββββββββββββββββββββββββββββββ
|
| 23 |
class MCPMessage:
|
| 24 |
def __init__(self, sender, message_type, content):
|
| 25 |
self.id = str(uuid.uuid4())
|
|
@@ -86,19 +86,20 @@ class MCPAgent:
|
|
| 86 |
def get_history(self):
|
| 87 |
return self.history
|
| 88 |
|
| 89 |
-
# βββ ComputeAgent βββββββββββββββββββββββββββββββββββββ
|
| 90 |
class ComputeAgent(MCPAgent):
|
| 91 |
def __init__(self):
|
| 92 |
super().__init__("ComputeAgent", "Loads & computes data")
|
| 93 |
self.df = None
|
| 94 |
-
self.register_tool(MCPTool("load_dataset", "Load CSV
|
| 95 |
-
self.register_tool(MCPTool("compute_statistics", "
|
| 96 |
-
self.register_tool(MCPTool("compute_correlation", "
|
| 97 |
|
| 98 |
def _load(self, params):
|
| 99 |
url = params.get("url", "").strip()
|
| 100 |
-
if not url
|
| 101 |
-
|
|
|
|
| 102 |
try:
|
| 103 |
self.df = pd.read_csv(url)
|
| 104 |
return {
|
|
@@ -114,24 +115,16 @@ class ComputeAgent(MCPAgent):
|
|
| 114 |
def _stats(self, params):
|
| 115 |
if self.df is None:
|
| 116 |
return {"status": "error", "message": "No data loaded"}
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
return {"status": "success", "statistics": stats}
|
| 121 |
-
except Exception as e:
|
| 122 |
-
logger.exception("Stats failed")
|
| 123 |
-
return {"status": "error", "message": str(e)}
|
| 124 |
|
| 125 |
def _corr(self, params):
|
| 126 |
if self.df is None:
|
| 127 |
return {"status": "error", "message": "No data loaded"}
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
return {"status": "success", "correlation_matrix": corr}
|
| 132 |
-
except Exception as e:
|
| 133 |
-
logger.exception("Corr failed")
|
| 134 |
-
return {"status": "error", "message": str(e)}
|
| 135 |
|
| 136 |
def handle_message(self, m):
|
| 137 |
if m.message_type == "request_data_load":
|
|
@@ -144,35 +137,26 @@ class ComputeAgent(MCPAgent):
|
|
| 144 |
res = self._corr(m.content)
|
| 145 |
self.send_message(m.sender, "correlation_result", res)
|
| 146 |
|
| 147 |
-
# βββ InterpretAgent (stubs) βββββββββββββββββββββββββββββββββ
|
| 148 |
class InterpretAgent(MCPAgent):
|
| 149 |
def __init__(self):
|
| 150 |
-
super().__init__("InterpretAgent", "
|
| 151 |
self.data_info = None
|
| 152 |
self.stats = None
|
| 153 |
self.corr = None
|
| 154 |
self.register_tool(MCPTool("interpret_statistics", "", self._int_stats))
|
| 155 |
self.register_tool(MCPTool("interpret_correlation", "", self._int_corr))
|
| 156 |
-
self.register_tool(MCPTool("llm_interpret", "
|
| 157 |
-
self.register_tool(MCPTool("llm_report", "
|
| 158 |
|
| 159 |
def _int_stats(self, params):
|
| 160 |
-
|
| 161 |
-
for col, vals in self.stats.get("statistics", {}).items():
|
| 162 |
-
if "mean" in vals:
|
| 163 |
-
ins.append(f"{col} avg={vals['mean']:.2f}")
|
| 164 |
-
if "min" in vals and "max" in vals:
|
| 165 |
-
ins.append(f"{col} β [{vals['min']:.2f},{vals['max']:.2f}]")
|
| 166 |
-
return {"status": "success", "insights": ins[:3], "summary": "Rule-based insights"}
|
| 167 |
|
| 168 |
def _int_corr(self, params):
|
| 169 |
-
return {"status": "
|
| 170 |
|
| 171 |
-
def
|
| 172 |
-
return {"status": "skipped"
|
| 173 |
-
|
| 174 |
-
def _llm_report(self, params):
|
| 175 |
-
return {"status": "skipped", "report_md": ""}
|
| 176 |
|
| 177 |
def handle_message(self, m):
|
| 178 |
if m.message_type == "data_load_result":
|
|
@@ -180,17 +164,14 @@ class InterpretAgent(MCPAgent):
|
|
| 180 |
self.send_message(m.sender, "ack", {"status": "loaded"})
|
| 181 |
elif m.message_type == "statistics_result":
|
| 182 |
self.stats = m.content
|
| 183 |
-
|
| 184 |
-
self.send_message(m.sender, "llm_statistics_interpretation", llm_res)
|
| 185 |
elif m.message_type == "correlation_result":
|
| 186 |
self.corr = m.content
|
| 187 |
-
|
| 188 |
-
self.send_message(m.sender, "llm_correlation_interpretation", llm_res)
|
| 189 |
elif m.message_type == "request_report":
|
| 190 |
-
|
| 191 |
-
self.send_message(m.sender, "report_result", llm_res)
|
| 192 |
|
| 193 |
-
# βββ Orchestration βββββββββββββββββββββββββββββββββββββ
|
| 194 |
class DataAnalystDuo:
|
| 195 |
def __init__(self):
|
| 196 |
self.C = ComputeAgent()
|
|
@@ -199,41 +180,53 @@ class DataAnalystDuo:
|
|
| 199 |
self.I.connect(self.C)
|
| 200 |
|
| 201 |
def run(self, url):
|
| 202 |
-
# 1) load
|
| 203 |
self.I.send_message("ComputeAgent", "request_data_load", {"url": url})
|
| 204 |
self.C.process(); self.I.process()
|
| 205 |
# 2) stats
|
| 206 |
self.I.send_message("ComputeAgent", "request_statistics", {})
|
| 207 |
self.C.process(); self.I.process()
|
| 208 |
-
# 3)
|
| 209 |
self.I.send_message("ComputeAgent", "request_correlation", {})
|
| 210 |
self.C.process(); self.I.process()
|
| 211 |
# 4) report
|
| 212 |
-
self.C.send_message("InterpretAgent", "request_report", {
|
| 213 |
self.I.process(); self.C.process()
|
| 214 |
-
# return two separate objects
|
| 215 |
-
return self.C.get_history(), self.I.get_history()
|
| 216 |
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
def run_analysis(url: str):
|
| 219 |
-
|
| 220 |
-
return compute_hist, interpret_hist
|
| 221 |
|
| 222 |
demo = gr.Interface(
|
| 223 |
fn=run_analysis,
|
| 224 |
-
inputs=gr.Textbox(label="CSV URL", placeholder="https://..."),
|
| 225 |
outputs=[
|
| 226 |
-
gr.
|
| 227 |
-
gr.JSON(label="
|
|
|
|
|
|
|
|
|
|
| 228 |
],
|
| 229 |
title="Data Analyst Duo",
|
| 230 |
-
description="
|
| 231 |
)
|
| 232 |
|
| 233 |
if __name__ == "__main__":
|
| 234 |
-
port = int(os.environ.get("PORT", 7860))
|
| 235 |
demo.launch(
|
| 236 |
server_name="0.0.0.0",
|
| 237 |
-
server_port=
|
| 238 |
share=True
|
| 239 |
)
|
|
|
|
| 1 |
"""
|
| 2 |
app.py β Data Analyst Duo MCP (no OpenAI) Gradio Space
|
| 3 |
+
Shows preview table, stats, corr, plus full JSON histories.
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
|
|
| 7 |
import uuid
|
| 8 |
import logging
|
| 9 |
import datetime
|
|
|
|
| 12 |
import numpy as np
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
+
# βββ Logging ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
logging.basicConfig(
|
| 17 |
level=logging.INFO,
|
| 18 |
format="%(asctime)s %(levelname)s:%(name)s: %(message)s"
|
| 19 |
)
|
| 20 |
logger = logging.getLogger("DataAnalystDuo")
|
| 21 |
|
| 22 |
+
# βββ MCP Core ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
class MCPMessage:
|
| 24 |
def __init__(self, sender, message_type, content):
|
| 25 |
self.id = str(uuid.uuid4())
|
|
|
|
| 86 |
def get_history(self):
|
| 87 |
return self.history
|
| 88 |
|
| 89 |
+
# βββ ComputeAgent ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 90 |
class ComputeAgent(MCPAgent):
|
| 91 |
def __init__(self):
|
| 92 |
super().__init__("ComputeAgent", "Loads & computes data")
|
| 93 |
self.df = None
|
| 94 |
+
self.register_tool(MCPTool("load_dataset", "Load CSV", self._load))
|
| 95 |
+
self.register_tool(MCPTool("compute_statistics", "Stats", self._stats))
|
| 96 |
+
self.register_tool(MCPTool("compute_correlation", "Corr", self._corr))
|
| 97 |
|
| 98 |
def _load(self, params):
|
| 99 |
url = params.get("url", "").strip()
|
| 100 |
+
if not url:
|
| 101 |
+
# default to diamonds dataset
|
| 102 |
+
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv"
|
| 103 |
try:
|
| 104 |
self.df = pd.read_csv(url)
|
| 105 |
return {
|
|
|
|
| 115 |
def _stats(self, params):
|
| 116 |
if self.df is None:
|
| 117 |
return {"status": "error", "message": "No data loaded"}
|
| 118 |
+
cols = self.df.select_dtypes(include=[np.number]).columns
|
| 119 |
+
stats = self.df[cols].describe().to_dict()
|
| 120 |
+
return {"status": "success", "statistics": stats}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
def _corr(self, params):
|
| 123 |
if self.df is None:
|
| 124 |
return {"status": "error", "message": "No data loaded"}
|
| 125 |
+
cols = self.df.select_dtypes(include=[np.number]).columns
|
| 126 |
+
corr = self.df[cols].corr().to_dict()
|
| 127 |
+
return {"status": "success", "correlation_matrix": corr}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
def handle_message(self, m):
|
| 130 |
if m.message_type == "request_data_load":
|
|
|
|
| 137 |
res = self._corr(m.content)
|
| 138 |
self.send_message(m.sender, "correlation_result", res)
|
| 139 |
|
| 140 |
+
# βββ InterpretAgent (stubs) ββββββββββββββββββββββββββββββββββββββ
|
| 141 |
class InterpretAgent(MCPAgent):
|
| 142 |
def __init__(self):
|
| 143 |
+
super().__init__("InterpretAgent", "Stubbed interp")
|
| 144 |
self.data_info = None
|
| 145 |
self.stats = None
|
| 146 |
self.corr = None
|
| 147 |
self.register_tool(MCPTool("interpret_statistics", "", self._int_stats))
|
| 148 |
self.register_tool(MCPTool("interpret_correlation", "", self._int_corr))
|
| 149 |
+
self.register_tool(MCPTool("llm_interpret", "stub", self._llm_stub))
|
| 150 |
+
self.register_tool(MCPTool("llm_report", "stub", self._llm_stub))
|
| 151 |
|
| 152 |
def _int_stats(self, params):
|
| 153 |
+
return {"status": "skipped", "insights": []}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
def _int_corr(self, params):
|
| 156 |
+
return {"status": "skipped", "insights": []}
|
| 157 |
|
| 158 |
+
def _llm_stub(self, params):
|
| 159 |
+
return {"status": "skipped"}
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
def handle_message(self, m):
|
| 162 |
if m.message_type == "data_load_result":
|
|
|
|
| 164 |
self.send_message(m.sender, "ack", {"status": "loaded"})
|
| 165 |
elif m.message_type == "statistics_result":
|
| 166 |
self.stats = m.content
|
| 167 |
+
self.send_message(m.sender, "llm_statistics_interpretation", {"status": "skipped"})
|
|
|
|
| 168 |
elif m.message_type == "correlation_result":
|
| 169 |
self.corr = m.content
|
| 170 |
+
self.send_message(m.sender, "llm_correlation_interpretation", {"status": "skipped"})
|
|
|
|
| 171 |
elif m.message_type == "request_report":
|
| 172 |
+
self.send_message(m.sender, "report_result", {"status": "skipped"})
|
|
|
|
| 173 |
|
| 174 |
+
# βββ Orchestration ββββββββββββββββββββββββββββββββββββββββββ
|
| 175 |
class DataAnalystDuo:
|
| 176 |
def __init__(self):
|
| 177 |
self.C = ComputeAgent()
|
|
|
|
| 180 |
self.I.connect(self.C)
|
| 181 |
|
| 182 |
def run(self, url):
|
| 183 |
+
# 1) load
|
| 184 |
self.I.send_message("ComputeAgent", "request_data_load", {"url": url})
|
| 185 |
self.C.process(); self.I.process()
|
| 186 |
# 2) stats
|
| 187 |
self.I.send_message("ComputeAgent", "request_statistics", {})
|
| 188 |
self.C.process(); self.I.process()
|
| 189 |
+
# 3) corr
|
| 190 |
self.I.send_message("ComputeAgent", "request_correlation", {})
|
| 191 |
self.C.process(); self.I.process()
|
| 192 |
# 4) report
|
| 193 |
+
self.C.send_message("InterpretAgent", "request_report", {})
|
| 194 |
self.I.process(); self.C.process()
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
hist_c = self.C.get_history()
|
| 197 |
+
hist_i = self.I.get_history()
|
| 198 |
+
|
| 199 |
+
# extract results
|
| 200 |
+
load = next(m["message"]["content"] for m in hist_c if m["message"]["message_type"]=="data_load_result")
|
| 201 |
+
stats = next(m["message"]["content"] for m in hist_c if m["message"]["message_type"]=="statistics_result")
|
| 202 |
+
corr = next(m["message"]["content"] for m in hist_c if m["message"]["message_type"]=="correlation_result")
|
| 203 |
+
|
| 204 |
+
# build preview DataFrame
|
| 205 |
+
preview_df = pd.DataFrame(load.get("preview", []))
|
| 206 |
+
|
| 207 |
+
return preview_df, stats, corr, hist_c, hist_i
|
| 208 |
+
|
| 209 |
+
# βββ Gradio app βββββββββββββββββββββββββββββββββββββββββββββ
|
| 210 |
def run_analysis(url: str):
|
| 211 |
+
return DataAnalystDuo().run(url)
|
|
|
|
| 212 |
|
| 213 |
demo = gr.Interface(
|
| 214 |
fn=run_analysis,
|
| 215 |
+
inputs=[gr.Textbox(label="CSV URL", placeholder="https://...")],
|
| 216 |
outputs=[
|
| 217 |
+
gr.Dataframe(label="Preview (first 5 rows)"),
|
| 218 |
+
gr.JSON(label="Statistics"),
|
| 219 |
+
gr.JSON(label="Correlation Matrix"),
|
| 220 |
+
gr.JSON(label="Compute History"),
|
| 221 |
+
gr.JSON(label="Interpret History"),
|
| 222 |
],
|
| 223 |
title="Data Analyst Duo",
|
| 224 |
+
description="Paste any CSV URL (e.g. diamonds.csv) to see data + stats + agent history"
|
| 225 |
)
|
| 226 |
|
| 227 |
if __name__ == "__main__":
|
|
|
|
| 228 |
demo.launch(
|
| 229 |
server_name="0.0.0.0",
|
| 230 |
+
server_port=int(os.environ.get("PORT", 7860)),
|
| 231 |
share=True
|
| 232 |
)
|