""" app.py ====== Streamlit UI — Data Analyst Agent (LangChain + Gemini) Run: streamlit run app.py """ import os import io import json import streamlit as st import pandas as pd import plotly.express as px from core_agent import ( get_llm, load_file, profile_dataframe, profile_to_text, set_dataframe, build_agent, run_agent, auto_suggest_charts, make_plotly_chart, recommend_chart ) # ─── Page Config ────────────────────────────────────────────────────────────── st.set_page_config( page_title="DataMind Agent", page_icon="🧠", layout="wide", initial_sidebar_state="expanded", ) # ─── Custom CSS ─────────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ─── Session State ──────────────────────────────────────────────────────────── for key, default in { "df": None, "profile": None, "file_type": None, "chat_history": [], "llm": None, "agent_executor": None, "api_key_set": False, }.items(): if key not in st.session_state: st.session_state[key] = default # ─── Sidebar ────────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("### 🧠 DataMind Agent") st.markdown("---") # API Key st.markdown("**🔑 Gemini API Key**") api_key = st.text_input( "Enter your key", type="password", placeholder="AIza...", help="Get free key at aistudio.google.com", label_visibility="collapsed" ) if api_key: if not st.session_state.api_key_set or st.session_state.get("_last_key") != api_key: try: st.session_state.llm = get_llm(api_key) st.session_state.agent_executor = build_agent(st.session_state.llm) st.session_state.api_key_set = True st.session_state["_last_key"] = api_key st.success("✅ Connected to Gemini!") except Exception as e: st.error(f"❌ Invalid key: {e}") st.markdown("---") # File Upload st.markdown("**📁 Upload Data File**") uploaded = st.file_uploader( "Upload", type=["csv", "xlsx", "xls", "json"], label_visibility="collapsed" ) if uploaded and st.session_state.api_key_set: with st.spinner("📊 Analyzing your data..."): try: df, ftype = load_file(uploaded) profile = profile_dataframe(df) st.session_state.df = df st.session_state.file_type = ftype st.session_state.profile = profile st.session_state.chat_history = [] set_dataframe(df, profile) st.success(f"✅ Loaded {ftype} file!") except Exception as e: st.error(f"❌ Error: {e}") elif uploaded and not st.session_state.api_key_set: st.warning("⚠️ Enter your Gemini API key first") st.markdown("---") st.markdown(""" **How to use:** 1. Paste your Gemini API key above 2. Upload CSV, Excel, or JSON file 3. Explore the Dashboard tab 4. Ask questions in Chat tab 5. Generate visuals in Charts tab --- **Get free Gemini API key:** [aistudio.google.com](https://aistudio.google.com/app/apikey) """) # ─── Main Content ───────────────────────────────────────────────────────────── st.markdown('

🧠 Data Mind Agent

', unsafe_allow_html=True) st.markdown('

AI-powered data analysis using LangChain + Gemini · Upload any data file and start exploring

', unsafe_allow_html=True) if st.session_state.df is None: # Landing state col1, col2, col3 = st.columns(3) with col1: st.markdown("""

📂

CSV, Excel, JSON

Upload any tabular data file — we handle the parsing automatically

""", unsafe_allow_html=True) with col2: st.markdown("""

💬

Natural Language Q&A

Ask anything about your data in plain English — no SQL needed

""", unsafe_allow_html=True) with col3: st.markdown("""

📊

Smart Visualizations

AI picks the right chart for your question automatically

""", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) st.info("👈 Enter your Gemini API key and upload a data file in the sidebar to get started!") else: df = st.session_state.df profile = st.session_state.profile llm = st.session_state.llm # ── Tabs ───────────────────────────────────────────────────────────────── tab1, tab2, tab3, tab4 = st.tabs(["📊 Dashboard", "💬 Chat", "🎨 Charts", "🔍 Raw Data"]) # ════════════════════════════════════════════════════════════════ # TAB 1 — Dashboard # ════════════════════════════════════════════════════════════════ with tab1: rows, cols = profile["shape"] nulls = sum(profile["null_counts"].values()) num_c = len(profile["numeric_columns"]) cat_c = len(profile["categorical_columns"]) c1, c2, c3, c4 = st.columns(4) c1.markdown(f'

{rows:,}

Rows

', unsafe_allow_html=True) c2.markdown(f'

{cols}

Columns

', unsafe_allow_html=True) c3.markdown(f'

{num_c}

Numeric Cols

', unsafe_allow_html=True) c4.markdown(f'

{nulls}

Missing Values

', unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # Column overview st.markdown("#### 📋 Column Overview") col_info = pd.DataFrame({ "Column": df.columns, "Type": df.dtypes.astype(str).values, "Non-Null": df.notnull().sum().values, "Null %": (df.isnull().mean() * 100).round(1).values, "Unique": df.nunique().values, }) st.dataframe(col_info, use_container_width=True, hide_index=True) # Auto charts st.markdown("#### 🤖 Auto-Generated Insights") suggested = auto_suggest_charts(profile)[:3] chart_cols = st.columns(min(len(suggested), 2)) for i, ctype in enumerate(suggested[:2]): with chart_cols[i]: try: fig = make_plotly_chart(ctype, df, profile) st.plotly_chart(fig, use_container_width=True) except Exception as e: st.warning(f"Could not render {ctype}: {e}") if len(suggested) > 2: try: fig = make_plotly_chart(suggested[2], df, profile) st.plotly_chart(fig, use_container_width=True) except Exception: pass # AI summary st.markdown("#### 🧠 AI Dataset Summary") if st.button("✨ Generate AI Summary"): with st.spinner("🤖 Agent is generating full report..."): set_dataframe(df, profile) result = run_agent( "Give me a full insight report on this dataset with key patterns, anomalies, and actionable recommendations.", st.session_state.agent_executor, [] ) st.markdown(f'

{result["output"]}

', unsafe_allow_html=True) if result["steps"]: with st.expander(f"🔍 Agent used {len(result['steps'])} tool(s)"): for i, (action, res) in enumerate(result["steps"]): st.markdown(f"**Step {i+1}: `{action.tool}`**") st.code(str(res)[:300] + "...", language="text") # ════════════════════════════════════════════════════════════════ # TAB 2 — Chat # ════════════════════════════════════════════════════════════════ with tab2: st.markdown("#### 💬 Ask Anything About Your Data") st.markdown("*The autonomous agent plans, uses tools, and reasons step-by-step to answer your question.*") # Suggested questions st.markdown("**Quick questions to try:**") suggestions = [ "Give me a full insight report on this data", "Are there any outliers or anomalies?", "What correlations exist between numeric columns?", ] q_cols = st.columns(3) for i, s in enumerate(suggestions): with q_cols[i]: if st.button(s, key=f"sug_{i}"): st.session_state["prefill_q"] = s # Chat history for turn in st.session_state.chat_history: st.markdown(f'

👤 {turn["user"]}

', unsafe_allow_html=True) # Show agent reasoning steps if turn.get("steps"): with st.expander(f"🔍 Agent used {len(turn['steps'])} tool(s) — click to see reasoning"): for i, (action, result) in enumerate(turn["steps"]): st.markdown(f"**Step {i+1}: `{action.tool}`**") st.caption(f"Input: {action.tool_input}") st.code(str(result)[:500] + ("..." if len(str(result)) > 500 else ""), language="text") st.markdown(f'

🧠 {turn["agent"]}

', unsafe_allow_html=True) # Input prefill = st.session_state.pop("prefill_q", "") question = st.text_input( "Ask a question...", value=prefill, placeholder="e.g. Which category has the highest profit? Find outliers in sales.", label_visibility="collapsed", ) col_send, col_clear = st.columns([1, 5]) with col_send: send = st.button("Send 🚀") with col_clear: if st.button("Clear Chat"): st.session_state.chat_history = [] st.rerun() if send and question.strip(): # Build LangChain chat history from session from langchain_core.messages import HumanMessage as HM, AIMessage lc_history = [] for turn in st.session_state.chat_history: lc_history.append(HM(content=turn["user"])) lc_history.append(AIMessage(content=turn["agent"])) with st.spinner("🤖 Agent is planning and executing tools..."): set_dataframe(df, profile) result = run_agent(question, st.session_state.agent_executor, lc_history) answer = result["output"] steps = result["steps"] # Get chart recommendation try: chart_json = json.loads(recommend_chart.invoke(question)) except Exception: chart_json = None st.session_state.chat_history.append({ "user": question, "agent": answer, "steps": steps, }) st.markdown(f'

👤 {question}

', unsafe_allow_html=True) # Show reasoning steps if steps: with st.expander(f"🔍 Agent used {len(steps)} tool(s) — click to see reasoning"): for i, (action, res) in enumerate(steps): st.markdown(f"**Step {i+1}: `{action.tool}`**") st.caption(f"Input: {action.tool_input}") st.code(str(res)[:500] + ("..." if len(str(res)) > 500 else ""), language="text") st.markdown(f'

🧠 {answer}

', unsafe_allow_html=True) # Auto chart if chart_json: try: fig = make_plotly_chart( chart_json["chart_type"], df, profile, x_col=chart_json.get("x_col"), y_col=chart_json.get("y_col"), ) st.plotly_chart(fig, use_container_width=True) except Exception: pass # ════════════════════════════════════════════════════════════════ # TAB 3 — Charts # ════════════════════════════════════════════════════════════════ with tab3: st.markdown("#### 🎨 Custom Chart Builder") chart_options = { "Correlation Heatmap": "correlation_heatmap", "Distribution Plot": "distribution_plots", "Box Plots": "box_plots", "Bar Chart": "bar_chart", "Pie Chart": "pie_chart", "Scatter Plot": "scatter", "Line Chart": "line", "Scatter Matrix": "scatter_matrix", } if profile["datetime_columns"]: chart_options["Time Series"] = "time_series" c1, c2, c3 = st.columns(3) with c1: chart_label = st.selectbox("Chart Type", list(chart_options.keys())) with c2: all_cols = ["(auto)"] + df.columns.tolist() x_col = st.selectbox("X Column", all_cols) with c3: y_col = st.selectbox("Y Column", all_cols) x_val = None if x_col == "(auto)" else x_col y_val = None if y_col == "(auto)" else y_col if st.button("🎨 Generate Chart"): with st.spinner("Rendering..."): try: fig = make_plotly_chart( chart_options[chart_label], df, profile, x_col=x_val, y_col=y_val ) st.plotly_chart(fig, use_container_width=True) except Exception as e: st.error(f"Chart error: {e}") st.markdown("---") st.markdown("#### 📊 All Auto-Suggested Charts") suggested_all = auto_suggest_charts(profile) for i in range(0, len(suggested_all), 2): cols = st.columns(2) for j, ctype in enumerate(suggested_all[i:i+2]): with cols[j]: try: fig = make_plotly_chart(ctype, df, profile) st.plotly_chart(fig, use_container_width=True) except Exception as e: st.warning(f"Could not render {ctype}") # ════════════════════════════════════════════════════════════════ # TAB 4 — Raw Data # ════════════════════════════════════════════════════════════════ with tab4: st.markdown("#### 🔍 Raw Data Explorer") # Search/filter search = st.text_input("🔎 Filter rows containing...", placeholder="Type to filter...") if search: mask = df.astype(str).apply(lambda row: row.str.contains(search, case=False, na=False)).any(axis=1) display_df = df[mask] st.info(f"Showing {len(display_df):,} of {len(df):,} rows matching '{search}'") else: display_df = df st.dataframe(display_df, use_container_width=True, height=500) # Download csv_buf = io.StringIO() df.to_csv(csv_buf, index=False) st.download_button( "⬇️ Download as CSV", data=csv_buf.getvalue(), file_name="analyzed_data.csv", mime="text/csv" )