Spaces:

LeannJoy
/

DataAnalysisApp

Sleeping

App Files Files Community

LeannJoy commited on Oct 16, 2025

Commit

dfbae8c

verified ·

1 Parent(s): 55d2921

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -80

app.py CHANGED Viewed

@@ -1,91 +1,125 @@
 import streamlit as st
 import pandas as pd
-from utils import perform_eda, query_agent
-import io
-# --- Page Configuration ---
-st.set_page_config(
-    page_title="NL Data Analysis Agent",
-    layout="wide",
-    initial_sidebar_state="expanded"
 )
-def main():
-    """Main function to run the Streamlit application."""
-    st.title("📊 Natural Language Data Analysis Agent")
-    st.caption("Upload a CSV file and ask questions about your data using plain English.")
-    # --- Sidebar for Instructions ---
-    with st.sidebar:
-        st.header("Instructions")
-        st.markdown("""
-        1. **Upload** your CSV file in the main area.
-        2. **Review** the automatic Exploratory Data Analysis (EDA) summary.
-        3. **Ask** your question in the text box. The system uses a LangChain Agent to run Python/Pandas code on your data and provide an answer.
-        **Examples of Queries:**
-        - "What is the average income?"
-        - "Show a histogram of the 'Age' column."
-        - "Calculate the correlation matrix and identify the strongest relationship."
-        - "Perform a bivariate analysis of 'Gender' and 'Purchase_Amount'."
-        """)
-        st.warning("⚠️ **Note on LLM:** This app uses the Gemini API via LangChain. For production use with a truly open-source LLM, you would need to replace the LLM initialization in `utils.py` with a compatible self-hosted or endpoint-based model.")
-    # --- File Uploader ---
-    uploaded_file = st.file_uploader(
-        "Upload a CSV file",
-        type="csv",
-        help="The CSV file should be structured with headers."
-    )
-    if uploaded_file is not None:
-        try:
-            # Read the CSV file into a Pandas DataFrame
-            # We use StringIO to ensure compatibility across environments
-            uploaded_file.seek(0)
-            data = uploaded_file.read()
-            df = pd.read_csv(io.StringIO(data.decode('utf-8')))
-            st.success("CSV file successfully loaded!")
-            # --- Data Preview ---
-            st.subheader("Data Preview")
-            st.dataframe(df.head())
-            # --- Automated EDA ---
-            with st.expander("🔬 Automated Exploratory Data Analysis (EDA)", expanded=False):
-                eda_summary = perform_eda(df)
-                st.markdown(eda_summary)
-            # --- Natural Language Query Interface ---
-            st.subheader("Ask a Question about the Data")
-            # Text input for the user's query
-            user_query = st.text_area(
-                "Enter your data question (e.g., 'Plot the distribution of Age' or 'What is the median salary?'):",
-                placeholder="Ask me anything about your data...",
-                key="user_query_input"
-            )
-            # Button to trigger the agent
-            if st.button("Run Analysis", use_container_width=True, type="primary") and user_query:
-                with st.spinner(f"Analyzing data with LangChain Agent..."):
-                    # Call the agent function from utils.py
-                    agent_response = query_agent(df, user_query)
-                    st.success("Analysis Complete!")
-                    st.markdown("---")
-                    st.subheader("Agent Response")
-                    # Display the response
-                    st.info(agent_response)
-        except Exception as e:
-            st.error(f"An error occurred while processing the file: {e}")
-            st.warning("Please ensure your CSV file is correctly formatted.")
-    else:
-        st.info("Awaiting CSV file upload.")
-if __name__ == "__main__":
-    main()

 import streamlit as st
 import pandas as pd
+# Import the helper functions, including the new guide generator
+from utils import perform_eda, query_agent, generate_eda_guide
+import os
+st.set_page_config(layout="wide", page_title="NL Data Analyst Agent")
+# --- Streamlit UI Setup ---
+st.title("📊 Natural Language Data Analyst")
+st.markdown("Upload a CSV file and ask the Mistral-powered agent questions about your data.")
+# --- Sidebar for Instructions and LLM-Generated EDA Steps ---
+st.sidebar.header("Instructions")
+st.sidebar.markdown(
+    """
+    1. **Upload** your CSV file below.
+    2. The application will immediately run a **preliminary EDA**.
+    3. Use the **Query Box** in the main area to ask detailed questions about your data.
+    """
 )
+# Use caching so the LLM guide is only generated once at startup
+@st.cache_data(show_spinner="Generating LLM-Powered EDA Guide...")
+def get_eda_guide():
+    """Wrapper function to cache the result of the LLM guide generation."""
+    try:
+        return generate_eda_guide()
+    except Exception:
+        return "Could not generate guide. Check LLM setup."
+# Put the LLM-generated guide inside an expander
+with st.sidebar.expander("📚 **LLM-Powered EDA Guide**", expanded=True):
+    guide_content = get_eda_guide()
+    st.markdown(guide_content)
+st.sidebar.info("Remember: The agent executes Python code. Be clear and specific with your requests, especially for plotting and complex analysis.")
+# --- File Uploader ---
+uploaded_file = st.file_uploader("Upload CSV File", type="csv")
+if uploaded_file is not None:
+    # Read the file and store it in session state
+    @st.cache_data
+    def load_data(file):
+        try:
+            # FIX: Changed delimiter to semicolon (;) and skip bad lines
+            df = pd.read_csv(file, sep=';', on_bad_lines='skip')
+            return df
+        except Exception as e:
+            st.error(f"Error reading CSV: {e}")
+            st.error("Try checking the file delimiter or encoding.")
+            return None
+    df = load_data(uploaded_file)
+    if df is not None:
+        st.session_state.df = df
+        st.subheader("Uploaded Data Preview")
+        st.dataframe(df.head())
+        # Perform and display the initial EDA
+        eda_markdown = perform_eda(df)
+        with st.expander("Preliminary EDA Results (Click to View Structure, Types, and Quality Checks)", expanded=True):
+            st.markdown(eda_markdown)
+        # --- Query Interface ---
+        st.subheader("Ask Your Data Analyst Agent")
+        query = st.text_area(
+            "Enter your natural language query:",
+            placeholder="e.g., What are the mean and standard deviation of the 'salary' column? Or: Show me a scatter plot of 'age' vs 'income'."
+        )
+        if st.button("Analyze Data"):
+            if query:
+                response = ""
+                with st.spinner("Agent is analyzing the data..."):
+                    # Call the LangChain agent
+                    response = query_agent(df, query)
+                st.markdown("---")
+                st.subheader("Agent Response")
+                # --- NEW LOGIC FOR FORMATTING OUTPUT ---
+                # 1. Check if the response contains a plot (Streamlit handles plot display automatically)
+                if "plot" in query.lower() or "chart" in query.lower() or "graph" in query.lower():
+                    st.success("Plot generated successfully (displayed below).")
+                    st.text_area("Agent's Explanation:", value=response, height=100)
+                else:
+                    # 2. Try to parse the response as a DataFrame for clean display
+                    is_dataframe = False
+                    # Common Pandas string representations start with indices or alignment markers
+                    if response.strip().startswith(('   ', '0', '1', '2', 'Index', 'Name')) or '\n' in response:
+                        try:
+                            # Use StringIO to read the response string as if it were a file
+                            from io import StringIO
+                            temp_df = pd.read_csv(StringIO(response.strip()), sep='\s\s+', engine='python')
+                            # A successful parse results in a table, but it might not be the intended data.
+                            # We check if the parsed table has a reasonable number of columns (> 1)
+                            if len(temp_df.columns) > 1:
+                                st.dataframe(temp_df)
+                                is_dataframe = True
+                        except Exception:
+                            # If parsing fails, it's not a clean DataFrame output, so treat it as text.
+                            pass
+                    # 3. If it wasn't a plot and we couldn't parse a clean DataFrame, display as informational text.
+                    if not is_dataframe:
+                        st.info(response)
+            else:
+                st.warning("Please enter a query to analyze the data.")
+else:
+    st.info("Awaiting CSV file upload.")
+# Add a note about the required API key for deployment
+if 'HUGGINGFACEHUB_API_TOKEN' not in st.session_state and 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
+    st.sidebar.error("HuggingFace API Token is missing. Set the `HUGGINGFACEHUB_API_TOKEN` environment variable or Streamlit Secret.")