Spaces:
Sleeping
Sleeping
File size: 4,074 Bytes
ed21fd7 216ce00 ffa2a6d d909df5 ed21fd7 987cfe3 ffa2a6d ee89673 0b69e41 987cfe3 ffa2a6d 987cfe3 ffa2a6d 0b69e41 ed21fd7 ffa2a6d 0b69e41 987cfe3 ffa2a6d 987cfe3 ffa2a6d 987cfe3 ffa2a6d 987cfe3 ffa2a6d 987cfe3 ee89673 ffa2a6d ee89673 987cfe3 ee89673 987cfe3 ee89673 987cfe3 ffa2a6d 987cfe3 ffa2a6d ee89673 987cfe3 ee89673 987cfe3 ee89673 78d0a49 987cfe3 ee89673 987cfe3 28071c8 987cfe3 ffa2a6d 6dd3d3c 987cfe3 6dd3d3c ffa2a6d 987cfe3 ee89673 987cfe3 6dd3d3c 20ed955 987cfe3 ffa2a6d 6dd3d3c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | import os
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import random
# LangChain + Gemini
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
# --- PAGE SETUP ---
st.set_page_config(
page_title="Agentic Data Analyst",
page_icon="π",
layout="wide"
)
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
def main():
st.title("π€ Agentic Data Analyst (Gemini 2.5 Flash)")
st.markdown("""
This agent intelligently analyzes your dataset using an agentic workflow.
It writes Python code, executes it, and returns insights.
""")
if not GEMINI_API_KEY:
st.error("β Missing `GEMINI_API_KEY`. Set it as an environment variable.")
st.stop()
# --- CSV UPLOAD ---
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
if uploaded_file:
@st.cache_data
def load_data(file):
return pd.read_csv(file)
df = load_data(uploaded_file)
with st.expander("π Data Overview"):
st.dataframe(df.head())
st.info(f"Rows: {df.shape[0]} | Columns: {df.shape[1]}")
# --- USER QUERY ---
query = st.text_area(
"What analysis would you like to perform?",
placeholder="e.g., Plot Price distribution"
)
if st.button("Run Agent") and query:
# --- LLM INIT (Gemini 2.5 Flash / Non-streaming fix) ---
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
google_api_key=GEMINI_API_KEY,
temperature=0,
max_retries=5,
streaming=False # IMPORTANT: avoids chunking error
)
# Provide dataframe context
df_context = f"The dataframe 'df' has columns: {', '.join(df.columns)}"
# --- PREFIX (clean, reliable code rules) ---
custom_prefix = f"""
You are a professional Python data analyst running inside a Streamlit + Pandas agent.
The dataframe is named `df`.
{df_context}
π¨ STRICT RULES π¨
# 1 β CODE QUALITY
- Code must be short, clean, correct.
- Never repeat imports.
- Only allowed imports inside Action Input:
import matplotlib.pyplot as plt
import seaborn as sns
- Never import streamlit.
- Never print().
- Never wrap outputs in markdown.
# 2 β PLOTTING RULES
- Before plotting filtered data, check if filtered.empty.
- Always start plots with: plt.figure()
- Always end plots with: st.pyplot(plt.gcf())
# 3 β OUTPUT FORMAT
- If code is required β return ONLY:
Action: python_repl_ast
Action Input:
<python code only>
- If no code is needed β return ONLY:
Final Answer: <answer>
Follow these rules EXACTLY.
"""
# --- CREATE AGENT ---
agent = create_pandas_dataframe_agent(
llm,
df,
verbose=True,
agent_type="zero-shot-react-description",
allow_dangerous_code=True,
prefix=custom_prefix,
include_df_in_prompt=False,
handle_parsing_errors=True,
agent_executor_kwargs={"handle_parsing_errors": True}
)
# --- EXECUTION ---
st.subheader("π§ Reasoning & Execution")
with st.spinner("Agent analyzing..."):
try:
# no callback (Gemini 2.5 streaming not supported)
response = agent.run(query)
st.markdown("---")
st.subheader("β
Final Analysis Result")
st.success(response)
except Exception as e:
st.error("Agent encountered an error.")
with st.expander("Show Technical Error"):
st.code(str(e))
else:
st.info("π Upload a CSV file to begin.")
if __name__ == "__main__":
main()
|