data-analysis-agent-2.0

Sleeping

App Files Files Community

sanjaystarc commited on Dec 24, 2025

Commit

78d0a49

verified ·

1 Parent(s): 3a5adc2

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -22

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
-# Using stable, modern imports to avoid version conflicts
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
 from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
@@ -22,20 +22,18 @@ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 def main():
     st.title("🤖 Agentic Data Analyst")
     st.markdown("""
-    This agent follows an **agentic workflow**: it reasons about your question, writes Python code,
-    observes the output, and self-corrects if it encounters errors.
     """)
-    # Check for API Key
     if not GEMINI_API_KEY:
-        st.error("❌ Missing `GEMINI_API_KEY`. Please set it as an environment variable or in Streamlit Secrets.")
         st.stop()
     # --- 2. DATA LOADING ---
     uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
     if uploaded_file:
-        # Load and cache for performance
         @st.cache_data
         def load_data(file):
             return pd.read_csv(file)
@@ -47,37 +45,47 @@ def main():
             st.info(f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns.")
         # --- 3. AGENT CONFIGURATION ---
-        query = st.text_area("What analysis would you like to perform?", placeholder="e.g., 'Analyze the relationship between x and y and show a scatter plot.'")
         if st.button("Run Agent") and query:
-            # Initialize the LLM (using Gemini 2.5 Flash for speed/reasoning balance)
             llm = ChatGoogleGenerativeAI(
-                model="gemini-2.5-flash-preview-09-2025",
                 google_api_key=GEMINI_API_KEY,
-                temperature=0, # Crucial for deterministic data analysis
             )
-            # Create the Pandas Agent
-            # Using the string identifier 'zero-shot-react-description' avoids import errors
             agent = create_pandas_dataframe_agent(
                 llm,
                 df,
                 verbose=True,
                 agent_type="zero-shot-react-description",
-                allow_dangerous_code=True, # Required to execute Python on the dataframe
-                handle_parsing_errors=True
             )
-            # --- 4. EXECUTION WITH VISUAL CALLBACKS ---
             st.subheader("🧠 Reasoning & Execution")
-            # This container allows the user to see the agent's step-by-step thinking
             thought_container = st.container()
             st_callback = StreamlitCallbackHandler(thought_container)
-            with st.spinner("Agent is working..."):
                 try:
-                    # Execute the loop
                     response = agent.run(query, callbacks=[st_callback])
                     st.markdown("---")
@@ -85,10 +93,12 @@ def main():
                     st.success(response)
                 except Exception as e:
-                    st.error(f"Agent failed to complete the task: {e}")
-                    st.info("Try rephrasing your query or checking if the column names are easy for the AI to understand.")
     else:
-        st.info("👆 Upload a CSV to begin the agentic session.")
 if __name__ == "__main__":
     main()

 import matplotlib.pyplot as plt
 import seaborn as sns
+# Updated LangChain Imports
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
 from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
 def main():
     st.title("🤖 Agentic Data Analyst")
     st.markdown("""
+    This agent follows an **agentic workflow**: it reasons, writes code,
+    observes results, and self-corrects.
     """)
     if not GEMINI_API_KEY:
+        st.error("❌ Missing `GEMINI_API_KEY`. Please set it as an environment variable.")
         st.stop()
     # --- 2. DATA LOADING ---
     uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
     if uploaded_file:
         @st.cache_data
         def load_data(file):
             return pd.read_csv(file)
             st.info(f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns.")
         # --- 3. AGENT CONFIGURATION ---
+        query = st.text_area("What analysis would you like to perform?", placeholder="e.g., 'Plot the distribution of prices.'")
         if st.button("Run Agent") and query:
             llm = ChatGoogleGenerativeAI(
+                model="gemini-2.5-flash",
                 google_api_key=GEMINI_API_KEY,
+                temperature=0,
             )
+            # CUSTOM PROMPT PREFIX: Helps Gemini follow the ReAct format strictly to avoid parsing errors
+            custom_prefix = """
+            You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
+            You should use the tools below to answer the question posed of you.
+            IMPORTANT: Your 'Action Input' must be valid Python code and ONLY Python code.
+            Do not wrap the code in markdown code blocks within the Action Input.
+            """
+            # Create the Pandas Agent with enhanced error handling
             agent = create_pandas_dataframe_agent(
                 llm,
                 df,
                 verbose=True,
                 agent_type="zero-shot-react-description",
+                allow_dangerous_code=True,
+                prefix=custom_prefix,
+                # Pass handle_parsing_errors here AND in agent_executor_kwargs for maximum stability
+                handle_parsing_errors=True,
+                agent_executor_kwargs={
+                    "handle_parsing_errors": True,
+                    "max_iterations": 5
+                }
             )
+            # --- 4. EXECUTION ---
             st.subheader("🧠 Reasoning & Execution")
             thought_container = st.container()
             st_callback = StreamlitCallbackHandler(thought_container)
+            with st.spinner("Agent is analyzing..."):
                 try:
+                    # Run the loop
                     response = agent.run(query, callbacks=[st_callback])
                     st.markdown("---")
                     st.success(response)
                 except Exception as e:
+                    st.error(f"Agent failed to complete the task.")
+                    with st.expander("Show Technical Error"):
+                        st.code(str(e))
+                    st.info("💡 Tip: The agent had trouble formatting its 'Action'. Try asking the question again or rephrasing it.")
     else:
+        st.info("👆 Upload a CSV to begin.")
 if __name__ == "__main__":
     main()