Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import time | |
| import random | |
| # LangChain + Gemini | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent | |
| # --- PAGE SETUP --- | |
| st.set_page_config( | |
| page_title="Agentic Data Analyst", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| def main(): | |
| st.title("π€ Agentic Data Analyst (Gemini 2.5 Flash)") | |
| st.markdown(""" | |
| This agent intelligently analyzes your dataset using an agentic workflow. | |
| It writes Python code, executes it, and returns insights. | |
| """) | |
| if not GEMINI_API_KEY: | |
| st.error("β Missing `GEMINI_API_KEY`. Set it as an environment variable.") | |
| st.stop() | |
| # --- CSV UPLOAD --- | |
| uploaded_file = st.file_uploader("Upload your CSV file", type="csv") | |
| if uploaded_file: | |
| def load_data(file): | |
| return pd.read_csv(file) | |
| df = load_data(uploaded_file) | |
| with st.expander("π Data Overview"): | |
| st.dataframe(df.head()) | |
| st.info(f"Rows: {df.shape[0]} | Columns: {df.shape[1]}") | |
| # --- USER QUERY --- | |
| query = st.text_area( | |
| "What analysis would you like to perform?", | |
| placeholder="e.g., Plot Price distribution" | |
| ) | |
| if st.button("Run Agent") and query: | |
| # --- LLM INIT (Gemini 2.5 Flash / Non-streaming fix) --- | |
| llm = ChatGoogleGenerativeAI( | |
| model="gemini-2.5-flash", | |
| google_api_key=GEMINI_API_KEY, | |
| temperature=0, | |
| max_retries=5, | |
| streaming=False # IMPORTANT: avoids chunking error | |
| ) | |
| # Provide dataframe context | |
| df_context = f"The dataframe 'df' has columns: {', '.join(df.columns)}" | |
| # --- PREFIX (clean, reliable code rules) --- | |
| custom_prefix = f""" | |
| You are a professional Python data analyst running inside a Streamlit + Pandas agent. | |
| The dataframe is named `df`. | |
| {df_context} | |
| π¨ STRICT RULES π¨ | |
| # 1 β CODE QUALITY | |
| - Code must be short, clean, correct. | |
| - Never repeat imports. | |
| - Only allowed imports inside Action Input: | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| - Never import streamlit. | |
| - Never print(). | |
| - Never wrap outputs in markdown. | |
| # 2 β PLOTTING RULES | |
| - Before plotting filtered data, check if filtered.empty. | |
| - Always start plots with: plt.figure() | |
| - Always end plots with: st.pyplot(plt.gcf()) | |
| # 3 β OUTPUT FORMAT | |
| - If code is required β return ONLY: | |
| Action: python_repl_ast | |
| Action Input: | |
| <python code only> | |
| - If no code is needed β return ONLY: | |
| Final Answer: <answer> | |
| Follow these rules EXACTLY. | |
| """ | |
| # --- CREATE AGENT --- | |
| agent = create_pandas_dataframe_agent( | |
| llm, | |
| df, | |
| verbose=True, | |
| agent_type="zero-shot-react-description", | |
| allow_dangerous_code=True, | |
| prefix=custom_prefix, | |
| include_df_in_prompt=False, | |
| handle_parsing_errors=True, | |
| agent_executor_kwargs={"handle_parsing_errors": True} | |
| ) | |
| # --- EXECUTION --- | |
| st.subheader("π§ Reasoning & Execution") | |
| with st.spinner("Agent analyzing..."): | |
| try: | |
| # no callback (Gemini 2.5 streaming not supported) | |
| response = agent.run(query) | |
| st.markdown("---") | |
| st.subheader("β Final Analysis Result") | |
| st.success(response) | |
| except Exception as e: | |
| st.error("Agent encountered an error.") | |
| with st.expander("Show Technical Error"): | |
| st.code(str(e)) | |
| else: | |
| st.info("π Upload a CSV file to begin.") | |
| if __name__ == "__main__": | |
| main() | |