File size: 4,074 Bytes
ed21fd7
216ce00
 
ffa2a6d
 
d909df5
 
ed21fd7
987cfe3
ffa2a6d
ee89673
0b69e41
987cfe3
 
ffa2a6d
987cfe3
 
ffa2a6d
0b69e41
ed21fd7
ffa2a6d
0b69e41
987cfe3
ffa2a6d
987cfe3
ffa2a6d
987cfe3
 
ffa2a6d
 
 
987cfe3
ffa2a6d
 
987cfe3
ee89673
ffa2a6d
 
ee89673
 
 
987cfe3
ee89673
987cfe3
ee89673
 
987cfe3
ffa2a6d
987cfe3
 
 
 
 
ffa2a6d
ee89673
987cfe3
 
ee89673
987cfe3
ee89673
78d0a49
987cfe3
 
ee89673
 
987cfe3
 
 
 
28071c8
987cfe3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ffa2a6d
6dd3d3c
 
 
987cfe3
6dd3d3c
 
 
 
 
ffa2a6d
 
987cfe3
ee89673
987cfe3
 
 
 
 
 
 
 
 
 
 
 
 
 
6dd3d3c
20ed955
987cfe3
 
ffa2a6d
 
6dd3d3c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import random

# LangChain + Gemini
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent


# --- PAGE SETUP ---
st.set_page_config(
    page_title="Agentic Data Analyst",
    page_icon="πŸ“Š",
    layout="wide"
)

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")


def main():
    st.title("πŸ€– Agentic Data Analyst (Gemini 2.5 Flash)")
    st.markdown("""
    This agent intelligently analyzes your dataset using an agentic workflow.
    It writes Python code, executes it, and returns insights.
    """)

    if not GEMINI_API_KEY:
        st.error("❌ Missing `GEMINI_API_KEY`. Set it as an environment variable.")
        st.stop()

    # --- CSV UPLOAD ---
    uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

    if uploaded_file:
        @st.cache_data
        def load_data(file):
            return pd.read_csv(file)

        df = load_data(uploaded_file)

        with st.expander("πŸ“„ Data Overview"):
            st.dataframe(df.head())
            st.info(f"Rows: {df.shape[0]} | Columns: {df.shape[1]}")

        # --- USER QUERY ---
        query = st.text_area(
            "What analysis would you like to perform?",
            placeholder="e.g., Plot Price distribution"
        )

        if st.button("Run Agent") and query:

            # --- LLM INIT (Gemini 2.5 Flash / Non-streaming fix) ---
            llm = ChatGoogleGenerativeAI(
                model="gemini-2.5-flash",
                google_api_key=GEMINI_API_KEY,
                temperature=0,
                max_retries=5,
                streaming=False                     # IMPORTANT: avoids chunking error
            )

            # Provide dataframe context
            df_context = f"The dataframe 'df' has columns: {', '.join(df.columns)}"

            # --- PREFIX (clean, reliable code rules) ---
            custom_prefix = f"""
You are a professional Python data analyst running inside a Streamlit + Pandas agent.
The dataframe is named `df`.
{df_context}

🚨 STRICT RULES 🚨

# 1 β€” CODE QUALITY
- Code must be short, clean, correct.
- Never repeat imports.
- Only allowed imports inside Action Input:
    import matplotlib.pyplot as plt
    import seaborn as sns
- Never import streamlit.
- Never print().
- Never wrap outputs in markdown.

# 2 β€” PLOTTING RULES
- Before plotting filtered data, check if filtered.empty.
- Always start plots with: plt.figure()
- Always end plots with: st.pyplot(plt.gcf())

# 3 β€” OUTPUT FORMAT
- If code is required β†’ return ONLY:
    Action: python_repl_ast
    Action Input:
    <python code only>
- If no code is needed β†’ return ONLY:
    Final Answer: <answer>

Follow these rules EXACTLY.
"""

            # --- CREATE AGENT ---
            agent = create_pandas_dataframe_agent(
                llm,
                df,
                verbose=True,
                agent_type="zero-shot-react-description",
                allow_dangerous_code=True,
                prefix=custom_prefix,
                include_df_in_prompt=False,
                handle_parsing_errors=True,
                agent_executor_kwargs={"handle_parsing_errors": True}
            )

            # --- EXECUTION ---
            st.subheader("🧠 Reasoning & Execution")

            with st.spinner("Agent analyzing..."):
                try:
                    # no callback (Gemini 2.5 streaming not supported)
                    response = agent.run(query)

                    st.markdown("---")
                    st.subheader("βœ… Final Analysis Result")
                    st.success(response)

                except Exception as e:
                    st.error("Agent encountered an error.")
                    with st.expander("Show Technical Error"):
                        st.code(str(e))

    else:
        st.info("πŸ‘† Upload a CSV file to begin.")


if __name__ == "__main__":
    main()