"""
app.py
======
Streamlit UI — Data Analyst Agent (LangChain + Gemini)
Run: streamlit run app.py
"""
import os
import io
import json
import streamlit as st
import pandas as pd
import plotly.express as px
from core_agent import (
get_llm, load_file, profile_dataframe, profile_to_text,
set_dataframe, build_agent, run_agent,
auto_suggest_charts, make_plotly_chart, recommend_chart
)
# ─── Page Config ──────────────────────────────────────────────────────────────
st.set_page_config(
page_title="DataMind Agent",
page_icon="🧠",
layout="wide",
initial_sidebar_state="expanded",
)
# ─── Custom CSS ───────────────────────────────────────────────────────────────
st.markdown("""
""", unsafe_allow_html=True)
# ─── Session State ────────────────────────────────────────────────────────────
for key, default in {
"df": None,
"profile": None,
"file_type": None,
"chat_history": [],
"llm": None,
"agent_executor": None,
"api_key_set": False,
}.items():
if key not in st.session_state:
st.session_state[key] = default
# ─── Sidebar ──────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("### 🧠 DataMind Agent")
st.markdown("---")
# API Key
st.markdown("**🔑 Gemini API Key**")
api_key = st.text_input(
"Enter your key", type="password",
placeholder="AIza...",
help="Get free key at aistudio.google.com",
label_visibility="collapsed"
)
if api_key:
if not st.session_state.api_key_set or st.session_state.get("_last_key") != api_key:
try:
st.session_state.llm = get_llm(api_key)
st.session_state.agent_executor = build_agent(st.session_state.llm)
st.session_state.api_key_set = True
st.session_state["_last_key"] = api_key
st.success("✅ Connected to Gemini!")
except Exception as e:
st.error(f"❌ Invalid key: {e}")
st.markdown("---")
# File Upload
st.markdown("**📁 Upload Data File**")
uploaded = st.file_uploader(
"Upload", type=["csv", "xlsx", "xls", "json"],
label_visibility="collapsed"
)
if uploaded and st.session_state.api_key_set:
with st.spinner("📊 Analyzing your data..."):
try:
df, ftype = load_file(uploaded)
profile = profile_dataframe(df)
st.session_state.df = df
st.session_state.file_type = ftype
st.session_state.profile = profile
st.session_state.chat_history = []
set_dataframe(df, profile)
st.success(f"✅ Loaded {ftype} file!")
except Exception as e:
st.error(f"❌ Error: {e}")
elif uploaded and not st.session_state.api_key_set:
st.warning("⚠️ Enter your Gemini API key first")
st.markdown("---")
st.markdown("""
**How to use:**
1. Paste your Gemini API key above
2. Upload CSV, Excel, or JSON file
3. Explore the Dashboard tab
4. Ask questions in Chat tab
5. Generate visuals in Charts tab
---
**Get free Gemini API key:**
[aistudio.google.com](https://aistudio.google.com/app/apikey)
""")
# ─── Main Content ─────────────────────────────────────────────────────────────
st.markdown('
🧠 Data Mind Agent
', unsafe_allow_html=True)
st.markdown('AI-powered data analysis using LangChain + Gemini · Upload any data file and start exploring
', unsafe_allow_html=True)
if st.session_state.df is None:
# Landing state
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("""
📂
CSV, Excel, JSON
Upload any tabular data file — we handle the parsing automatically
""", unsafe_allow_html=True)
with col2:
st.markdown("""
💬
Natural Language Q&A
Ask anything about your data in plain English — no SQL needed
""", unsafe_allow_html=True)
with col3:
st.markdown("""
📊
Smart Visualizations
AI picks the right chart for your question automatically
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
st.info("👈 Enter your Gemini API key and upload a data file in the sidebar to get started!")
else:
df = st.session_state.df
profile = st.session_state.profile
llm = st.session_state.llm
# ── Tabs ─────────────────────────────────────────────────────────────────
tab1, tab2, tab3, tab4 = st.tabs(["📊 Dashboard", "💬 Chat", "🎨 Charts", "🔍 Raw Data"])
# ════════════════════════════════════════════════════════════════
# TAB 1 — Dashboard
# ════════════════════════════════════════════════════════════════
with tab1:
rows, cols = profile["shape"]
nulls = sum(profile["null_counts"].values())
num_c = len(profile["numeric_columns"])
cat_c = len(profile["categorical_columns"])
c1, c2, c3, c4 = st.columns(4)
c1.markdown(f'', unsafe_allow_html=True)
c2.markdown(f'', unsafe_allow_html=True)
c3.markdown(f'', unsafe_allow_html=True)
c4.markdown(f'', unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# Column overview
st.markdown("#### 📋 Column Overview")
col_info = pd.DataFrame({
"Column": df.columns,
"Type": df.dtypes.astype(str).values,
"Non-Null": df.notnull().sum().values,
"Null %": (df.isnull().mean() * 100).round(1).values,
"Unique": df.nunique().values,
})
st.dataframe(col_info, use_container_width=True, hide_index=True)
# Auto charts
st.markdown("#### 🤖 Auto-Generated Insights")
suggested = auto_suggest_charts(profile)[:3]
chart_cols = st.columns(min(len(suggested), 2))
for i, ctype in enumerate(suggested[:2]):
with chart_cols[i]:
try:
fig = make_plotly_chart(ctype, df, profile)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.warning(f"Could not render {ctype}: {e}")
if len(suggested) > 2:
try:
fig = make_plotly_chart(suggested[2], df, profile)
st.plotly_chart(fig, use_container_width=True)
except Exception:
pass
# AI summary
st.markdown("#### 🧠 AI Dataset Summary")
if st.button("✨ Generate AI Summary"):
with st.spinner("🤖 Agent is generating full report..."):
set_dataframe(df, profile)
result = run_agent(
"Give me a full insight report on this dataset with key patterns, anomalies, and actionable recommendations.",
st.session_state.agent_executor, []
)
st.markdown(f'{result["output"]}
', unsafe_allow_html=True)
if result["steps"]:
with st.expander(f"🔍 Agent used {len(result['steps'])} tool(s)"):
for i, (action, res) in enumerate(result["steps"]):
st.markdown(f"**Step {i+1}: `{action.tool}`**")
st.code(str(res)[:300] + "...", language="text")
# ════════════════════════════════════════════════════════════════
# TAB 2 — Chat
# ════════════════════════════════════════════════════════════════
with tab2:
st.markdown("#### 💬 Ask Anything About Your Data")
st.markdown("*The autonomous agent plans, uses tools, and reasons step-by-step to answer your question.*")
# Suggested questions
st.markdown("**Quick questions to try:**")
suggestions = [
"Give me a full insight report on this data",
"Are there any outliers or anomalies?",
"What correlations exist between numeric columns?",
]
q_cols = st.columns(3)
for i, s in enumerate(suggestions):
with q_cols[i]:
if st.button(s, key=f"sug_{i}"):
st.session_state["prefill_q"] = s
# Chat history
for turn in st.session_state.chat_history:
st.markdown(f'👤 {turn["user"]}
', unsafe_allow_html=True)
# Show agent reasoning steps
if turn.get("steps"):
with st.expander(f"🔍 Agent used {len(turn['steps'])} tool(s) — click to see reasoning"):
for i, (action, result) in enumerate(turn["steps"]):
st.markdown(f"**Step {i+1}: `{action.tool}`**")
st.caption(f"Input: {action.tool_input}")
st.code(str(result)[:500] + ("..." if len(str(result)) > 500 else ""), language="text")
st.markdown(f'🧠 {turn["agent"]}
', unsafe_allow_html=True)
# Input
prefill = st.session_state.pop("prefill_q", "")
question = st.text_input(
"Ask a question...",
value=prefill,
placeholder="e.g. Which category has the highest profit? Find outliers in sales.",
label_visibility="collapsed",
)
col_send, col_clear = st.columns([1, 5])
with col_send:
send = st.button("Send 🚀")
with col_clear:
if st.button("Clear Chat"):
st.session_state.chat_history = []
st.rerun()
if send and question.strip():
# Build LangChain chat history from session
from langchain_core.messages import HumanMessage as HM, AIMessage
lc_history = []
for turn in st.session_state.chat_history:
lc_history.append(HM(content=turn["user"]))
lc_history.append(AIMessage(content=turn["agent"]))
with st.spinner("🤖 Agent is planning and executing tools..."):
set_dataframe(df, profile)
result = run_agent(question, st.session_state.agent_executor, lc_history)
answer = result["output"]
steps = result["steps"]
# Get chart recommendation
try:
chart_json = json.loads(recommend_chart.invoke(question))
except Exception:
chart_json = None
st.session_state.chat_history.append({
"user": question,
"agent": answer,
"steps": steps,
})
st.markdown(f'👤 {question}
', unsafe_allow_html=True)
# Show reasoning steps
if steps:
with st.expander(f"🔍 Agent used {len(steps)} tool(s) — click to see reasoning"):
for i, (action, res) in enumerate(steps):
st.markdown(f"**Step {i+1}: `{action.tool}`**")
st.caption(f"Input: {action.tool_input}")
st.code(str(res)[:500] + ("..." if len(str(res)) > 500 else ""), language="text")
st.markdown(f'🧠 {answer}
', unsafe_allow_html=True)
# Auto chart
if chart_json:
try:
fig = make_plotly_chart(
chart_json["chart_type"], df, profile,
x_col=chart_json.get("x_col"),
y_col=chart_json.get("y_col"),
)
st.plotly_chart(fig, use_container_width=True)
except Exception:
pass
# ════════════════════════════════════════════════════════════════
# TAB 3 — Charts
# ════════════════════════════════════════════════════════════════
with tab3:
st.markdown("#### 🎨 Custom Chart Builder")
chart_options = {
"Correlation Heatmap": "correlation_heatmap",
"Distribution Plot": "distribution_plots",
"Box Plots": "box_plots",
"Bar Chart": "bar_chart",
"Pie Chart": "pie_chart",
"Scatter Plot": "scatter",
"Line Chart": "line",
"Scatter Matrix": "scatter_matrix",
}
if profile["datetime_columns"]:
chart_options["Time Series"] = "time_series"
c1, c2, c3 = st.columns(3)
with c1:
chart_label = st.selectbox("Chart Type", list(chart_options.keys()))
with c2:
all_cols = ["(auto)"] + df.columns.tolist()
x_col = st.selectbox("X Column", all_cols)
with c3:
y_col = st.selectbox("Y Column", all_cols)
x_val = None if x_col == "(auto)" else x_col
y_val = None if y_col == "(auto)" else y_col
if st.button("🎨 Generate Chart"):
with st.spinner("Rendering..."):
try:
fig = make_plotly_chart(
chart_options[chart_label], df, profile,
x_col=x_val, y_col=y_val
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Chart error: {e}")
st.markdown("---")
st.markdown("#### 📊 All Auto-Suggested Charts")
suggested_all = auto_suggest_charts(profile)
for i in range(0, len(suggested_all), 2):
cols = st.columns(2)
for j, ctype in enumerate(suggested_all[i:i+2]):
with cols[j]:
try:
fig = make_plotly_chart(ctype, df, profile)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.warning(f"Could not render {ctype}")
# ════════════════════════════════════════════════════════════════
# TAB 4 — Raw Data
# ════════════════════════════════════════════════════════════════
with tab4:
st.markdown("#### 🔍 Raw Data Explorer")
# Search/filter
search = st.text_input("🔎 Filter rows containing...", placeholder="Type to filter...")
if search:
mask = df.astype(str).apply(lambda row: row.str.contains(search, case=False, na=False)).any(axis=1)
display_df = df[mask]
st.info(f"Showing {len(display_df):,} of {len(df):,} rows matching '{search}'")
else:
display_df = df
st.dataframe(display_df, use_container_width=True, height=500)
# Download
csv_buf = io.StringIO()
df.to_csv(csv_buf, index=False)
st.download_button(
"⬇️ Download as CSV",
data=csv_buf.getvalue(),
file_name="analyzed_data.csv",
mime="text/csv"
)