Spaces:
Sleeping
Sleeping
Nyha15 commited on
Commit Β·
45ef072
1
Parent(s): ce28d7c
Removed LLM call
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
Data Analyst Duo MCP
|
| 3 |
"""
|
| 4 |
|
| 5 |
import os
|
|
@@ -13,12 +13,6 @@ import pandas as pd
|
|
| 13 |
import numpy as np
|
| 14 |
import requests
|
| 15 |
import gradio as gr
|
| 16 |
-
import openai
|
| 17 |
-
|
| 18 |
-
# βββ OpenAI Setup ββββββββββββββββββββββββββββββββββββββ
|
| 19 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 20 |
-
if not openai.api_key:
|
| 21 |
-
raise EnvironmentError("Missing OPENAI_API_KEY environment variable")
|
| 22 |
|
| 23 |
# βββ Logging setup βββββββββββββββββββββββββββββββββββββ
|
| 24 |
logging.basicConfig(
|
|
@@ -37,9 +31,13 @@ class MCPMessage:
|
|
| 37 |
self.timestamp = datetime.datetime.now().isoformat()
|
| 38 |
|
| 39 |
def to_dict(self):
|
| 40 |
-
return {
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
class MCPTool:
|
| 45 |
def __init__(self, name, description, func):
|
|
@@ -105,129 +103,114 @@ class ComputeAgent(MCPAgent):
|
|
| 105 |
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/cereal.csv"
|
| 106 |
try:
|
| 107 |
self.df = pd.read_csv(url)
|
| 108 |
-
return {
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
| 111 |
except Exception as e:
|
| 112 |
logger.exception("Load failed")
|
| 113 |
-
return {"status":"error","message":str(e)}
|
| 114 |
|
| 115 |
def _stats(self, params):
|
| 116 |
if self.df is None:
|
| 117 |
-
return {"status":"error","message":"No data loaded"}
|
| 118 |
try:
|
| 119 |
cols = list(self.df.select_dtypes(include=[np.number]).columns)
|
| 120 |
stats = self.df[cols].describe().to_dict()
|
| 121 |
-
return {"status":"success","statistics":stats}
|
| 122 |
except Exception as e:
|
| 123 |
logger.exception("Stats failed")
|
| 124 |
-
return {"status":"error","message":str(e)}
|
| 125 |
|
| 126 |
def _corr(self, params):
|
| 127 |
if self.df is None:
|
| 128 |
-
return {"status":"error","message":"No data loaded"}
|
| 129 |
try:
|
| 130 |
cols = list(self.df.select_dtypes(include=[np.number]).columns)
|
| 131 |
corr = self.df[cols].corr().to_dict()
|
| 132 |
-
return {"status":"success","correlation_matrix":corr}
|
| 133 |
except Exception as e:
|
| 134 |
logger.exception("Corr failed")
|
| 135 |
-
return {"status":"error","message":str(e)}
|
| 136 |
|
| 137 |
def handle_message(self, m):
|
| 138 |
if m.message_type == "request_data_load":
|
| 139 |
-
res = self._load(m.content)
|
| 140 |
self.send_message(m.sender, "data_load_result", res)
|
| 141 |
elif m.message_type == "request_statistics":
|
| 142 |
-
res = self._stats(m.content)
|
| 143 |
self.send_message(m.sender, "statistics_result", res)
|
| 144 |
elif m.message_type == "request_correlation":
|
| 145 |
-
res = self._corr(m.content)
|
| 146 |
self.send_message(m.sender, "correlation_result", res)
|
| 147 |
|
| 148 |
-
# βββ InterpretAgent with
|
| 149 |
class InterpretAgent(MCPAgent):
|
| 150 |
def __init__(self):
|
| 151 |
-
super().__init__("InterpretAgent","Interprets & reports
|
| 152 |
self.data_info = None
|
| 153 |
self.stats = None
|
| 154 |
self.corr = None
|
| 155 |
-
#
|
| 156 |
-
self.register_tool(MCPTool("interpret_statistics","",self._int_stats))
|
| 157 |
-
self.register_tool(MCPTool("interpret_correlation","",self._int_corr))
|
| 158 |
-
# LLM
|
| 159 |
-
self.register_tool(MCPTool("llm_interpret","
|
| 160 |
-
self.register_tool(MCPTool("llm_report","
|
| 161 |
|
| 162 |
def _int_stats(self, params):
|
| 163 |
-
ins=[]
|
| 164 |
-
for col,vals in self.stats.get("statistics",{}).items():
|
| 165 |
-
if "mean" in vals:
|
|
|
|
| 166 |
if "min" in vals and "max" in vals:
|
| 167 |
ins.append(f"{col} β [{vals['min']:.2f},{vals['max']:.2f}]")
|
| 168 |
-
return {"status":"success","insights":ins[:3],"summary":"Rule-based insights"}
|
| 169 |
|
| 170 |
def _int_corr(self, params):
|
| 171 |
-
return {"status":"success","insights":["Correlation computed"],"summary":"Rule-based corr"}
|
| 172 |
|
| 173 |
def _llm_interpret(self, params):
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
json.dumps(self.stats, indent=2) +
|
| 177 |
-
"\nPlease summarize the top 3 insights in plain English." )
|
| 178 |
-
resp = openai.ChatCompletion.create(
|
| 179 |
-
model="gpt-4",
|
| 180 |
-
messages=[{"role":"user","content":prompt}]
|
| 181 |
-
)
|
| 182 |
-
text = resp.choices[0].message.content.strip()
|
| 183 |
-
lines = [l for l in text.split("\n") if l.strip()]
|
| 184 |
-
return {"status":"success","insights":lines,"summary":text}
|
| 185 |
|
| 186 |
def _llm_report(self, params):
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
f"Dataset info: rows={self.data_info['rows']}, cols={len(self.data_info['columns'])}\n" +
|
| 190 |
-
json.dumps(self.stats, indent=2) + "\n" +
|
| 191 |
-
json.dumps(self.corr, indent=2)
|
| 192 |
-
)
|
| 193 |
-
resp = openai.ChatCompletion.create(
|
| 194 |
-
model="gpt-4",
|
| 195 |
-
messages=[{"role":"user","content":prompt}]
|
| 196 |
-
)
|
| 197 |
-
return {"status":"success","report_md":resp.choices[0].message.content.strip()}
|
| 198 |
|
| 199 |
def handle_message(self, m):
|
| 200 |
if m.message_type == "data_load_result":
|
| 201 |
self.data_info = m.content
|
| 202 |
-
self.send_message(m.sender,"ack",{"status":"loaded"})
|
| 203 |
elif m.message_type == "statistics_result":
|
| 204 |
self.stats = m.content
|
| 205 |
-
# LLM-driven interpretation
|
| 206 |
llm_res = self._llm_interpret({})
|
| 207 |
-
self.send_message(m.sender,"llm_statistics_interpretation",llm_res)
|
| 208 |
elif m.message_type == "correlation_result":
|
| 209 |
self.corr = m.content
|
| 210 |
llm_res = self._llm_interpret({})
|
| 211 |
-
self.send_message(m.sender,"llm_correlation_interpretation",llm_res)
|
| 212 |
elif m.message_type == "request_report":
|
| 213 |
llm_res = self._llm_report({})
|
| 214 |
-
self.send_message(m.sender,"report_result",llm_res)
|
| 215 |
|
| 216 |
-
# βββ Orchestration
|
| 217 |
class DataAnalystDuo:
|
| 218 |
def __init__(self):
|
| 219 |
-
self.C=ComputeAgent()
|
| 220 |
-
self.
|
|
|
|
|
|
|
| 221 |
|
| 222 |
-
def run(self,url):
|
| 223 |
-
self.I.send_message("ComputeAgent","request_data_load",{"url":url})
|
| 224 |
self.C.process(); self.I.process()
|
| 225 |
-
self.I.send_message("ComputeAgent","request_statistics",{})
|
| 226 |
self.C.process(); self.I.process()
|
| 227 |
-
self.I.send_message("ComputeAgent","request_correlation",{})
|
| 228 |
self.C.process(); self.I.process()
|
| 229 |
-
self.C.send_message("InterpretAgent","request_report",{"report_title":"Analysis Report"})
|
| 230 |
self.I.process(); self.C.process()
|
| 231 |
return self.C.get_history(), self.I.get_history()
|
| 232 |
-
|
| 233 |
-
|
|
|
|
| 1 |
"""
|
| 2 |
+
Data Analyst Duo MCP without OpenAI Integration
|
| 3 |
"""
|
| 4 |
|
| 5 |
import os
|
|
|
|
| 13 |
import numpy as np
|
| 14 |
import requests
|
| 15 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# βββ Logging setup βββββββββββββββββββββββββββββββββββββ
|
| 18 |
logging.basicConfig(
|
|
|
|
| 31 |
self.timestamp = datetime.datetime.now().isoformat()
|
| 32 |
|
| 33 |
def to_dict(self):
|
| 34 |
+
return {
|
| 35 |
+
"id": self.id,
|
| 36 |
+
"sender": self.sender,
|
| 37 |
+
"message_type": self.message_type,
|
| 38 |
+
"content": self.content,
|
| 39 |
+
"timestamp": self.timestamp,
|
| 40 |
+
}
|
| 41 |
|
| 42 |
class MCPTool:
|
| 43 |
def __init__(self, name, description, func):
|
|
|
|
| 103 |
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/cereal.csv"
|
| 104 |
try:
|
| 105 |
self.df = pd.read_csv(url)
|
| 106 |
+
return {
|
| 107 |
+
"status": "success",
|
| 108 |
+
"rows": self.df.shape[0],
|
| 109 |
+
"columns": list(self.df.columns),
|
| 110 |
+
"preview": self.df.head(5).to_dict(orient="records")
|
| 111 |
+
}
|
| 112 |
except Exception as e:
|
| 113 |
logger.exception("Load failed")
|
| 114 |
+
return {"status": "error", "message": str(e)}
|
| 115 |
|
| 116 |
def _stats(self, params):
|
| 117 |
if self.df is None:
|
| 118 |
+
return {"status": "error", "message": "No data loaded"}
|
| 119 |
try:
|
| 120 |
cols = list(self.df.select_dtypes(include=[np.number]).columns)
|
| 121 |
stats = self.df[cols].describe().to_dict()
|
| 122 |
+
return {"status": "success", "statistics": stats}
|
| 123 |
except Exception as e:
|
| 124 |
logger.exception("Stats failed")
|
| 125 |
+
return {"status": "error", "message": str(e)}
|
| 126 |
|
| 127 |
def _corr(self, params):
|
| 128 |
if self.df is None:
|
| 129 |
+
return {"status": "error", "message": "No data loaded"}
|
| 130 |
try:
|
| 131 |
cols = list(self.df.select_dtypes(include=[np.number]).columns)
|
| 132 |
corr = self.df[cols].corr().to_dict()
|
| 133 |
+
return {"status": "success", "correlation_matrix": corr}
|
| 134 |
except Exception as e:
|
| 135 |
logger.exception("Corr failed")
|
| 136 |
+
return {"status": "error", "message": str(e)}
|
| 137 |
|
| 138 |
def handle_message(self, m):
|
| 139 |
if m.message_type == "request_data_load":
|
| 140 |
+
res = self._load(m.content)
|
| 141 |
self.send_message(m.sender, "data_load_result", res)
|
| 142 |
elif m.message_type == "request_statistics":
|
| 143 |
+
res = self._stats(m.content)
|
| 144 |
self.send_message(m.sender, "statistics_result", res)
|
| 145 |
elif m.message_type == "request_correlation":
|
| 146 |
+
res = self._corr(m.content)
|
| 147 |
self.send_message(m.sender, "correlation_result", res)
|
| 148 |
|
| 149 |
+
# βββ InterpretAgent with stubs βββββββββββββββββββββββββββββββ
|
| 150 |
class InterpretAgent(MCPAgent):
|
| 151 |
def __init__(self):
|
| 152 |
+
super().__init__("InterpretAgent", "Interprets & reports (no LLM)")
|
| 153 |
self.data_info = None
|
| 154 |
self.stats = None
|
| 155 |
self.corr = None
|
| 156 |
+
# rule-based tools
|
| 157 |
+
self.register_tool(MCPTool("interpret_statistics", "", self._int_stats))
|
| 158 |
+
self.register_tool(MCPTool("interpret_correlation", "", self._int_corr))
|
| 159 |
+
# LLM stubs
|
| 160 |
+
self.register_tool(MCPTool("llm_interpret", "LLM stub", self._llm_interpret))
|
| 161 |
+
self.register_tool(MCPTool("llm_report", "LLM stub", self._llm_report))
|
| 162 |
|
| 163 |
def _int_stats(self, params):
|
| 164 |
+
ins = []
|
| 165 |
+
for col, vals in self.stats.get("statistics", {}).items():
|
| 166 |
+
if "mean" in vals:
|
| 167 |
+
ins.append(f"{col} avg={vals['mean']:.2f}")
|
| 168 |
if "min" in vals and "max" in vals:
|
| 169 |
ins.append(f"{col} β [{vals['min']:.2f},{vals['max']:.2f}]")
|
| 170 |
+
return {"status": "success", "insights": ins[:3], "summary": "Rule-based insights"}
|
| 171 |
|
| 172 |
def _int_corr(self, params):
|
| 173 |
+
return {"status": "success", "insights": ["Correlation computed"], "summary": "Rule-based corr"}
|
| 174 |
|
| 175 |
def _llm_interpret(self, params):
|
| 176 |
+
# stubbed out; no OpenAI
|
| 177 |
+
return {"status": "skipped", "insights": [], "summary": "LLM removed"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
def _llm_report(self, params):
|
| 180 |
+
# stubbed out; no OpenAI
|
| 181 |
+
return {"status": "skipped", "report_md": ""}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
def handle_message(self, m):
|
| 184 |
if m.message_type == "data_load_result":
|
| 185 |
self.data_info = m.content
|
| 186 |
+
self.send_message(m.sender, "ack", {"status": "loaded"})
|
| 187 |
elif m.message_type == "statistics_result":
|
| 188 |
self.stats = m.content
|
|
|
|
| 189 |
llm_res = self._llm_interpret({})
|
| 190 |
+
self.send_message(m.sender, "llm_statistics_interpretation", llm_res)
|
| 191 |
elif m.message_type == "correlation_result":
|
| 192 |
self.corr = m.content
|
| 193 |
llm_res = self._llm_interpret({})
|
| 194 |
+
self.send_message(m.sender, "llm_correlation_interpretation", llm_res)
|
| 195 |
elif m.message_type == "request_report":
|
| 196 |
llm_res = self._llm_report({})
|
| 197 |
+
self.send_message(m.sender, "report_result", llm_res)
|
| 198 |
|
| 199 |
+
# βββ Orchestration (unchanged) βββββββββββββββββββββββββββββ
|
| 200 |
class DataAnalystDuo:
|
| 201 |
def __init__(self):
|
| 202 |
+
self.C = ComputeAgent()
|
| 203 |
+
self.I = InterpretAgent()
|
| 204 |
+
self.C.connect(self.I)
|
| 205 |
+
self.I.connect(self.C)
|
| 206 |
|
| 207 |
+
def run(self, url):
|
| 208 |
+
self.I.send_message("ComputeAgent", "request_data_load", {"url": url})
|
| 209 |
self.C.process(); self.I.process()
|
| 210 |
+
self.I.send_message("ComputeAgent", "request_statistics", {})
|
| 211 |
self.C.process(); self.I.process()
|
| 212 |
+
self.I.send_message("ComputeAgent", "request_correlation", {})
|
| 213 |
self.C.process(); self.I.process()
|
| 214 |
+
self.C.send_message("InterpretAgent", "request_report", {"report_title": "Analysis Report"})
|
| 215 |
self.I.process(); self.C.process()
|
| 216 |
return self.C.get_history(), self.I.get_history()
|
|
|
|
|
|