Nyha15 commited on
Commit
230d1ef
Β·
1 Parent(s): cd765a0

Removed LLM call

Browse files
Files changed (1) hide show
  1. app.py +54 -61
app.py CHANGED
@@ -1,9 +1,9 @@
1
  """
2
  app.py β€” Data Analyst Duo MCP (no OpenAI) Gradio Space
 
3
  """
4
 
5
  import os
6
- import json
7
  import uuid
8
  import logging
9
  import datetime
@@ -12,14 +12,14 @@ import pandas as pd
12
  import numpy as np
13
  import gradio as gr
14
 
15
- # β€”β€”β€” Logging setup β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
16
  logging.basicConfig(
17
  level=logging.INFO,
18
  format="%(asctime)s %(levelname)s:%(name)s: %(message)s"
19
  )
20
  logger = logging.getLogger("DataAnalystDuo")
21
 
22
- # β€”β€”β€” MCP Core β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
23
  class MCPMessage:
24
  def __init__(self, sender, message_type, content):
25
  self.id = str(uuid.uuid4())
@@ -86,19 +86,20 @@ class MCPAgent:
86
  def get_history(self):
87
  return self.history
88
 
89
- # β€”β€”β€” ComputeAgent β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
90
  class ComputeAgent(MCPAgent):
91
  def __init__(self):
92
  super().__init__("ComputeAgent", "Loads & computes data")
93
  self.df = None
94
- self.register_tool(MCPTool("load_dataset", "Load CSV from URL", self._load))
95
- self.register_tool(MCPTool("compute_statistics", "Descriptive stats", self._stats))
96
- self.register_tool(MCPTool("compute_correlation", "Correlation matrix", self._corr))
97
 
98
  def _load(self, params):
99
  url = params.get("url", "").strip()
100
- if not url or url.lower() == "default":
101
- url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
 
102
  try:
103
  self.df = pd.read_csv(url)
104
  return {
@@ -114,24 +115,16 @@ class ComputeAgent(MCPAgent):
114
  def _stats(self, params):
115
  if self.df is None:
116
  return {"status": "error", "message": "No data loaded"}
117
- try:
118
- cols = list(self.df.select_dtypes(include=[np.number]).columns)
119
- stats = self.df[cols].describe().to_dict()
120
- return {"status": "success", "statistics": stats}
121
- except Exception as e:
122
- logger.exception("Stats failed")
123
- return {"status": "error", "message": str(e)}
124
 
125
  def _corr(self, params):
126
  if self.df is None:
127
  return {"status": "error", "message": "No data loaded"}
128
- try:
129
- cols = list(self.df.select_dtypes(include=[np.number]).columns)
130
- corr = self.df[cols].corr().to_dict()
131
- return {"status": "success", "correlation_matrix": corr}
132
- except Exception as e:
133
- logger.exception("Corr failed")
134
- return {"status": "error", "message": str(e)}
135
 
136
  def handle_message(self, m):
137
  if m.message_type == "request_data_load":
@@ -144,35 +137,26 @@ class ComputeAgent(MCPAgent):
144
  res = self._corr(m.content)
145
  self.send_message(m.sender, "correlation_result", res)
146
 
147
- # β€”β€”β€” InterpretAgent (stubs) β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
148
  class InterpretAgent(MCPAgent):
149
  def __init__(self):
150
- super().__init__("InterpretAgent", "Interprets & reports (no LLM)")
151
  self.data_info = None
152
  self.stats = None
153
  self.corr = None
154
  self.register_tool(MCPTool("interpret_statistics", "", self._int_stats))
155
  self.register_tool(MCPTool("interpret_correlation", "", self._int_corr))
156
- self.register_tool(MCPTool("llm_interpret", "LLM stub", self._llm_interpret))
157
- self.register_tool(MCPTool("llm_report", "LLM stub", self._llm_report))
158
 
159
  def _int_stats(self, params):
160
- ins = []
161
- for col, vals in self.stats.get("statistics", {}).items():
162
- if "mean" in vals:
163
- ins.append(f"{col} avg={vals['mean']:.2f}")
164
- if "min" in vals and "max" in vals:
165
- ins.append(f"{col} ∈ [{vals['min']:.2f},{vals['max']:.2f}]")
166
- return {"status": "success", "insights": ins[:3], "summary": "Rule-based insights"}
167
 
168
  def _int_corr(self, params):
169
- return {"status": "success", "insights": ["Correlation computed"], "summary": "Rule-based corr"}
170
 
171
- def _llm_interpret(self, params):
172
- return {"status": "skipped", "insights": [], "summary": "LLM removed"}
173
-
174
- def _llm_report(self, params):
175
- return {"status": "skipped", "report_md": ""}
176
 
177
  def handle_message(self, m):
178
  if m.message_type == "data_load_result":
@@ -180,17 +164,14 @@ class InterpretAgent(MCPAgent):
180
  self.send_message(m.sender, "ack", {"status": "loaded"})
181
  elif m.message_type == "statistics_result":
182
  self.stats = m.content
183
- llm_res = self._llm_interpret({})
184
- self.send_message(m.sender, "llm_statistics_interpretation", llm_res)
185
  elif m.message_type == "correlation_result":
186
  self.corr = m.content
187
- llm_res = self._llm_interpret({})
188
- self.send_message(m.sender, "llm_correlation_interpretation", llm_res)
189
  elif m.message_type == "request_report":
190
- llm_res = self._llm_report({})
191
- self.send_message(m.sender, "report_result", llm_res)
192
 
193
- # β€”β€”β€” Orchestration β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
194
  class DataAnalystDuo:
195
  def __init__(self):
196
  self.C = ComputeAgent()
@@ -199,41 +180,53 @@ class DataAnalystDuo:
199
  self.I.connect(self.C)
200
 
201
  def run(self, url):
202
- # 1) load data
203
  self.I.send_message("ComputeAgent", "request_data_load", {"url": url})
204
  self.C.process(); self.I.process()
205
  # 2) stats
206
  self.I.send_message("ComputeAgent", "request_statistics", {})
207
  self.C.process(); self.I.process()
208
- # 3) correlation
209
  self.I.send_message("ComputeAgent", "request_correlation", {})
210
  self.C.process(); self.I.process()
211
  # 4) report
212
- self.C.send_message("InterpretAgent", "request_report", {"report_title": "Analysis Report"})
213
  self.I.process(); self.C.process()
214
- # return two separate objects
215
- return self.C.get_history(), self.I.get_history()
216
 
217
- # β€”β€”β€” Gradio app entrypoint β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  def run_analysis(url: str):
219
- compute_hist, interpret_hist = DataAnalystDuo().run(url)
220
- return compute_hist, interpret_hist
221
 
222
  demo = gr.Interface(
223
  fn=run_analysis,
224
- inputs=gr.Textbox(label="CSV URL", placeholder="https://..."),
225
  outputs=[
226
- gr.JSON(label="Compute & Data-Load History"),
227
- gr.JSON(label="Interpret & Report History"),
 
 
 
228
  ],
229
  title="Data Analyst Duo",
230
- description="Load a CSV URL and see compute + interpretation steps"
231
  )
232
 
233
  if __name__ == "__main__":
234
- port = int(os.environ.get("PORT", 7860))
235
  demo.launch(
236
  server_name="0.0.0.0",
237
- server_port=port,
238
  share=True
239
  )
 
1
  """
2
  app.py β€” Data Analyst Duo MCP (no OpenAI) Gradio Space
3
+ Shows preview table, stats, corr, plus full JSON histories.
4
  """
5
 
6
  import os
 
7
  import uuid
8
  import logging
9
  import datetime
 
12
  import numpy as np
13
  import gradio as gr
14
 
15
+ # β€”β€”β€” Logging β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
16
  logging.basicConfig(
17
  level=logging.INFO,
18
  format="%(asctime)s %(levelname)s:%(name)s: %(message)s"
19
  )
20
  logger = logging.getLogger("DataAnalystDuo")
21
 
22
+ # β€”β€”β€” MCP Core β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
23
  class MCPMessage:
24
  def __init__(self, sender, message_type, content):
25
  self.id = str(uuid.uuid4())
 
86
  def get_history(self):
87
  return self.history
88
 
89
+ # β€”β€”β€” ComputeAgent β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
90
  class ComputeAgent(MCPAgent):
91
  def __init__(self):
92
  super().__init__("ComputeAgent", "Loads & computes data")
93
  self.df = None
94
+ self.register_tool(MCPTool("load_dataset", "Load CSV", self._load))
95
+ self.register_tool(MCPTool("compute_statistics", "Stats", self._stats))
96
+ self.register_tool(MCPTool("compute_correlation", "Corr", self._corr))
97
 
98
  def _load(self, params):
99
  url = params.get("url", "").strip()
100
+ if not url:
101
+ # default to diamonds dataset
102
+ url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv"
103
  try:
104
  self.df = pd.read_csv(url)
105
  return {
 
115
  def _stats(self, params):
116
  if self.df is None:
117
  return {"status": "error", "message": "No data loaded"}
118
+ cols = self.df.select_dtypes(include=[np.number]).columns
119
+ stats = self.df[cols].describe().to_dict()
120
+ return {"status": "success", "statistics": stats}
 
 
 
 
121
 
122
  def _corr(self, params):
123
  if self.df is None:
124
  return {"status": "error", "message": "No data loaded"}
125
+ cols = self.df.select_dtypes(include=[np.number]).columns
126
+ corr = self.df[cols].corr().to_dict()
127
+ return {"status": "success", "correlation_matrix": corr}
 
 
 
 
128
 
129
  def handle_message(self, m):
130
  if m.message_type == "request_data_load":
 
137
  res = self._corr(m.content)
138
  self.send_message(m.sender, "correlation_result", res)
139
 
140
+ # β€”β€”β€” InterpretAgent (stubs) β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
141
  class InterpretAgent(MCPAgent):
142
  def __init__(self):
143
+ super().__init__("InterpretAgent", "Stubbed interp")
144
  self.data_info = None
145
  self.stats = None
146
  self.corr = None
147
  self.register_tool(MCPTool("interpret_statistics", "", self._int_stats))
148
  self.register_tool(MCPTool("interpret_correlation", "", self._int_corr))
149
+ self.register_tool(MCPTool("llm_interpret", "stub", self._llm_stub))
150
+ self.register_tool(MCPTool("llm_report", "stub", self._llm_stub))
151
 
152
  def _int_stats(self, params):
153
+ return {"status": "skipped", "insights": []}
 
 
 
 
 
 
154
 
155
  def _int_corr(self, params):
156
+ return {"status": "skipped", "insights": []}
157
 
158
+ def _llm_stub(self, params):
159
+ return {"status": "skipped"}
 
 
 
160
 
161
  def handle_message(self, m):
162
  if m.message_type == "data_load_result":
 
164
  self.send_message(m.sender, "ack", {"status": "loaded"})
165
  elif m.message_type == "statistics_result":
166
  self.stats = m.content
167
+ self.send_message(m.sender, "llm_statistics_interpretation", {"status": "skipped"})
 
168
  elif m.message_type == "correlation_result":
169
  self.corr = m.content
170
+ self.send_message(m.sender, "llm_correlation_interpretation", {"status": "skipped"})
 
171
  elif m.message_type == "request_report":
172
+ self.send_message(m.sender, "report_result", {"status": "skipped"})
 
173
 
174
+ # β€”β€”β€” Orchestration β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
175
  class DataAnalystDuo:
176
  def __init__(self):
177
  self.C = ComputeAgent()
 
180
  self.I.connect(self.C)
181
 
182
  def run(self, url):
183
+ # 1) load
184
  self.I.send_message("ComputeAgent", "request_data_load", {"url": url})
185
  self.C.process(); self.I.process()
186
  # 2) stats
187
  self.I.send_message("ComputeAgent", "request_statistics", {})
188
  self.C.process(); self.I.process()
189
+ # 3) corr
190
  self.I.send_message("ComputeAgent", "request_correlation", {})
191
  self.C.process(); self.I.process()
192
  # 4) report
193
+ self.C.send_message("InterpretAgent", "request_report", {})
194
  self.I.process(); self.C.process()
 
 
195
 
196
+ hist_c = self.C.get_history()
197
+ hist_i = self.I.get_history()
198
+
199
+ # extract results
200
+ load = next(m["message"]["content"] for m in hist_c if m["message"]["message_type"]=="data_load_result")
201
+ stats = next(m["message"]["content"] for m in hist_c if m["message"]["message_type"]=="statistics_result")
202
+ corr = next(m["message"]["content"] for m in hist_c if m["message"]["message_type"]=="correlation_result")
203
+
204
+ # build preview DataFrame
205
+ preview_df = pd.DataFrame(load.get("preview", []))
206
+
207
+ return preview_df, stats, corr, hist_c, hist_i
208
+
209
+ # β€”β€”β€” Gradio app β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
210
  def run_analysis(url: str):
211
+ return DataAnalystDuo().run(url)
 
212
 
213
  demo = gr.Interface(
214
  fn=run_analysis,
215
+ inputs=[gr.Textbox(label="CSV URL", placeholder="https://...")],
216
  outputs=[
217
+ gr.Dataframe(label="Preview (first 5 rows)"),
218
+ gr.JSON(label="Statistics"),
219
+ gr.JSON(label="Correlation Matrix"),
220
+ gr.JSON(label="Compute History"),
221
+ gr.JSON(label="Interpret History"),
222
  ],
223
  title="Data Analyst Duo",
224
+ description="Paste any CSV URL (e.g. diamonds.csv) to see data + stats + agent history"
225
  )
226
 
227
  if __name__ == "__main__":
 
228
  demo.launch(
229
  server_name="0.0.0.0",
230
+ server_port=int(os.environ.get("PORT", 7860)),
231
  share=True
232
  )