sanjaystarc's picture
Update app.py
987cfe3 verified
import os
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import random
# LangChain + Gemini
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
# --- PAGE SETUP ---
st.set_page_config(
page_title="Agentic Data Analyst",
page_icon="πŸ“Š",
layout="wide"
)
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
def main():
st.title("πŸ€– Agentic Data Analyst (Gemini 2.5 Flash)")
st.markdown("""
This agent intelligently analyzes your dataset using an agentic workflow.
It writes Python code, executes it, and returns insights.
""")
if not GEMINI_API_KEY:
st.error("❌ Missing `GEMINI_API_KEY`. Set it as an environment variable.")
st.stop()
# --- CSV UPLOAD ---
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
if uploaded_file:
@st.cache_data
def load_data(file):
return pd.read_csv(file)
df = load_data(uploaded_file)
with st.expander("πŸ“„ Data Overview"):
st.dataframe(df.head())
st.info(f"Rows: {df.shape[0]} | Columns: {df.shape[1]}")
# --- USER QUERY ---
query = st.text_area(
"What analysis would you like to perform?",
placeholder="e.g., Plot Price distribution"
)
if st.button("Run Agent") and query:
# --- LLM INIT (Gemini 2.5 Flash / Non-streaming fix) ---
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
google_api_key=GEMINI_API_KEY,
temperature=0,
max_retries=5,
streaming=False # IMPORTANT: avoids chunking error
)
# Provide dataframe context
df_context = f"The dataframe 'df' has columns: {', '.join(df.columns)}"
# --- PREFIX (clean, reliable code rules) ---
custom_prefix = f"""
You are a professional Python data analyst running inside a Streamlit + Pandas agent.
The dataframe is named `df`.
{df_context}
🚨 STRICT RULES 🚨
# 1 β€” CODE QUALITY
- Code must be short, clean, correct.
- Never repeat imports.
- Only allowed imports inside Action Input:
import matplotlib.pyplot as plt
import seaborn as sns
- Never import streamlit.
- Never print().
- Never wrap outputs in markdown.
# 2 β€” PLOTTING RULES
- Before plotting filtered data, check if filtered.empty.
- Always start plots with: plt.figure()
- Always end plots with: st.pyplot(plt.gcf())
# 3 β€” OUTPUT FORMAT
- If code is required β†’ return ONLY:
Action: python_repl_ast
Action Input:
<python code only>
- If no code is needed β†’ return ONLY:
Final Answer: <answer>
Follow these rules EXACTLY.
"""
# --- CREATE AGENT ---
agent = create_pandas_dataframe_agent(
llm,
df,
verbose=True,
agent_type="zero-shot-react-description",
allow_dangerous_code=True,
prefix=custom_prefix,
include_df_in_prompt=False,
handle_parsing_errors=True,
agent_executor_kwargs={"handle_parsing_errors": True}
)
# --- EXECUTION ---
st.subheader("🧠 Reasoning & Execution")
with st.spinner("Agent analyzing..."):
try:
# no callback (Gemini 2.5 streaming not supported)
response = agent.run(query)
st.markdown("---")
st.subheader("βœ… Final Analysis Result")
st.success(response)
except Exception as e:
st.error("Agent encountered an error.")
with st.expander("Show Technical Error"):
st.code(str(e))
else:
st.info("πŸ‘† Upload a CSV file to begin.")
if __name__ == "__main__":
main()