import os import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import time import random # LangChain + Gemini from langchain_google_genai import ChatGoogleGenerativeAI from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent # --- PAGE SETUP --- st.set_page_config( page_title="Agentic Data Analyst", page_icon="📊", layout="wide" ) GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") def main(): st.title("🤖 Agentic Data Analyst (Gemini 2.5 Flash)") st.markdown(""" This agent intelligently analyzes your dataset using an agentic workflow. It writes Python code, executes it, and returns insights. """) if not GEMINI_API_KEY: st.error("❌ Missing `GEMINI_API_KEY`. Set it as an environment variable.") st.stop() # --- CSV UPLOAD --- uploaded_file = st.file_uploader("Upload your CSV file", type="csv") if uploaded_file: @st.cache_data def load_data(file): return pd.read_csv(file) df = load_data(uploaded_file) with st.expander("📄 Data Overview"): st.dataframe(df.head()) st.info(f"Rows: {df.shape[0]} | Columns: {df.shape[1]}") # --- USER QUERY --- query = st.text_area( "What analysis would you like to perform?", placeholder="e.g., Plot Price distribution" ) if st.button("Run Agent") and query: # --- LLM INIT (Gemini 2.5 Flash / Non-streaming fix) --- llm = ChatGoogleGenerativeAI( model="gemini-2.5-flash", google_api_key=GEMINI_API_KEY, temperature=0, max_retries=5, streaming=False # IMPORTANT: avoids chunking error ) # Provide dataframe context df_context = f"The dataframe 'df' has columns: {', '.join(df.columns)}" # --- PREFIX (clean, reliable code rules) --- custom_prefix = f""" You are a professional Python data analyst running inside a Streamlit + Pandas agent. The dataframe is named `df`. {df_context} 🚨 STRICT RULES 🚨 # 1 — CODE QUALITY - Code must be short, clean, correct. - Never repeat imports. - Only allowed imports inside Action Input: import matplotlib.pyplot as plt import seaborn as sns - Never import streamlit. - Never print(). - Never wrap outputs in markdown. # 2 — PLOTTING RULES - Before plotting filtered data, check if filtered.empty. - Always start plots with: plt.figure() - Always end plots with: st.pyplot(plt.gcf()) # 3 — OUTPUT FORMAT - If code is required → return ONLY: Action: python_repl_ast Action Input: - If no code is needed → return ONLY: Final Answer: Follow these rules EXACTLY. """ # --- CREATE AGENT --- agent = create_pandas_dataframe_agent( llm, df, verbose=True, agent_type="zero-shot-react-description", allow_dangerous_code=True, prefix=custom_prefix, include_df_in_prompt=False, handle_parsing_errors=True, agent_executor_kwargs={"handle_parsing_errors": True} ) # --- EXECUTION --- st.subheader("🧠 Reasoning & Execution") with st.spinner("Agent analyzing..."): try: # no callback (Gemini 2.5 streaming not supported) response = agent.run(query) st.markdown("---") st.subheader("✅ Final Analysis Result") st.success(response) except Exception as e: st.error("Agent encountered an error.") with st.expander("Show Technical Error"): st.code(str(e)) else: st.info("👆 Upload a CSV file to begin.") if __name__ == "__main__": main()