Spaces:

LeannJoy
/

DataAnalysisApp

Sleeping

App Files Files Community

LeannJoy commited on Oct 17, 2025

Commit

6e2134f

verified ·

1 Parent(s): 3b29588

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -116

app.py CHANGED Viewed

@@ -1,135 +1,103 @@
 import streamlit as st
-import pandas as pd
-# Import the helper functions, including the new guide generator
-from utils import perform_eda, query_agent, generate_eda_guide
 import os
-# Import StringIO for the custom DataFrame parsing logic
-from io import StringIO # Keep this import, which is also used later in app.py
-st.set_page_config(layout="wide", page_title="NL Data Analyst Agent")
-# --- Streamlit UI Setup ---
-st.title("📊 Natural Language Data Analyst")
-st.markdown("Upload a CSV file and ask the Mistral-powered agent questions about your data.")
-# --- Sidebar for Instructions and LLM-Generated EDA Steps ---
-st.sidebar.header("Instructions")
-st.sidebar.markdown(
-    """
-    1. **Upload** your CSV file below.
-    2. The application will immediately run a **preliminary EDA**.
-    3. Use the **Query Box** in the main area to ask detailed questions about your data.
-    """
-)
-# Use caching so the LLM guide is only generated once at startup
-@st.cache_data(show_spinner="Generating LLM-Powered EDA Guide...")
-def get_eda_guide():
-    """Wrapper function to cache the result of the LLM guide generation."""
     try:
-        return generate_eda_guide()
-    except Exception:
-        return "Could not generate guide. Check LLM setup."
-# Put the LLM-generated guide inside an expander
-with st.sidebar.expander("📚 **LLM-Powered EDA Guide**", expanded=True):
-    guide_content = get_eda_guide()
-    st.markdown(guide_content)
-st.sidebar.info("Remember: The agent executes Python code. Be clear and specific with your requests, especially for plotting and complex analysis.")
-# --- File Uploader ---
-uploaded_file = st.file_uploader("Upload CSV File", type="csv")
-if uploaded_file is not None:
-    # Read the file and store it in session state
-    @st.cache_data
-    def load_data(file):
-        try:
-            # FIX: Use a regex pattern for separation to handle common delimiters (comma, tab, semicolon, colon)
-            # The 'python' engine is required for regex separators.
-            df = pd.read_csv(file, sep='[,\t;:]', engine='python', on_bad_lines='skip')
-            # Additional check: If only one column is read, it likely failed. Try standard comma again as a fallback.
-            if len(df.columns) <= 1:
-                file.seek(0) # Reset file pointer
-                df = pd.read_csv(file, on_bad_lines='skip')
-                # If still only one column, there is a fundamental issue with the file
-                if len(df.columns) <= 1:
-                    raise ValueError("Failed to auto-detect delimiter. Data is in a single column.")
-            return df
-        except Exception as e:
-            st.error(f"Error reading CSV: {e}")
-            st.error("Attempted to auto-detect delimiter (comma, semicolon, tab, colon) but failed. Please ensure the file is valid and properly delimited.")
-            return None
-    df = load_data(uploaded_file)
-    if df is not None:
-        st.session_state.df = df
-        st.subheader("Uploaded Data Preview")
-        st.dataframe(df.head())
-        # Perform and display the initial EDA
-        eda_markdown = perform_eda(df)
-        with st.expander("Preliminary EDA Results (Click to View Structure, Types, and Quality Checks)", expanded=True):
-            st.markdown(eda_markdown)
-        # --- Query Interface ---
-        st.subheader("Ask Your Data Analyst Agent")
-        query = st.text_area(
-            "Enter your natural language query:",
-            placeholder="e.g., What are the mean and standard deviation of the 'salary' column? Or: Show me a scatter plot of 'age' vs 'income'."
         )
-        if st.button("Analyze Data"):
-            if query:
-                response = ""
-                with st.spinner("Agent is analyzing the data..."):
-                    # Call the LangChain agent
-                    response = query_agent(df, query)
-                st.markdown("---")
-                st.subheader("Agent Response")
-                # --- NEW LOGIC FOR FORMATTING OUTPUT ---
-                # 1. Check if the response contains a plot (Streamlit handles plot display automatically)
-                if "plot" in query.lower() or "chart" in query.lower() or "graph" in query.lower():
-                    st.success("Plot generated successfully (displayed below).")
-                    st.text_area("Agent's Explanation:", value=response, height=100)
-                else:
-                    # 2. Try to parse the response as a DataFrame for clean display
-                    is_dataframe = False
-                    # Common Pandas string representations start with indices or alignment markers
-                    if response.strip().startswith(('    ', '0', '1', '2', 'Index', 'Name')) or '\n' in response:
-                        try:
-                            # Use StringIO to read the response string as if it were a file
-                            # The import is already here or should be added: from io import StringIO
-                            temp_df = pd.read_csv(StringIO(response.strip()), sep='\s\s+', engine='python')
-                            if len(temp_df.columns) > 1:
-                                st.dataframe(temp_df)
-                                is_dataframe = True
-                        except Exception:
-                            # If parsing fails, it's not a clean DataFrame output, so treat it as text.
-                            pass
-                    # 3. If it wasn't a plot and we couldn't parse a clean DataFrame, display as informational text.
-                    if not is_dataframe:
-                        st.info(response)
-            else:
-                st.warning("Please enter a query to analyze the data.")
-else:
-    st.info("Awaiting CSV file upload.")
-# Add a note about the required API key for deployment
-if 'HUGGINGFACEHUB_API_TOKEN' not in st.session_state and 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
-    st.sidebar.error("HuggingFace API Token is missing. Set the `HUGGINGFACEHUB_API_TOKEN` environment variable or Streamlit Secret.")

 import streamlit as st
+from dotenv import load_dotenv
 import os
+from utils import query_agent
+import io
+# Load environment variables (like HUGGINGFACEHUB_API_TOKEN) from .env
+load_dotenv()
+def display_data_preview(uploaded_file_bytes):
+    """Utility function to display a preview of the uploaded data."""
     try:
+        # Use utils' logic to detect delimiter
+        from utils import detect_csv_delimiter
+        delimiter = detect_csv_delimiter(uploaded_file_bytes)
+        # Reset stream and load for preview
+        uploaded_file_bytes.seek(0)
+        df_preview = pd.read_csv(uploaded_file_bytes, sep=delimiter, nrows=5)
+        st.subheader("Data Preview (First 5 Rows)")
+        st.dataframe(df_preview, use_container_width=True)
+        st.caption(f"File loaded using detected delimiter: `{delimiter}`")
+        # Reset stream position again for the main agent function
+        uploaded_file_bytes.seek(0)
+        return True
+    except Exception as e:
+        st.error(f"Failed to read or preview the CSV file. Please ensure it is a valid CSV format. Error: {e}")
+        return False
+# --- Main Streamlit Application ---
+def main():
+    """Main function to run the Streamlit application."""
+    st.set_page_config(
+        page_title="NL Data Analyst (LLM Powered)",
+        layout="wide",
+        initial_sidebar_state="expanded"
+    )
+    st.title("📊 Natural Language Data Analyst")
+    st.markdown("Upload a CSV file and ask questions about your data using plain English.")
+    # Check for API key presence
+    if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
+        st.error("🚨 HUGGINGFACEHUB_API_TOKEN not found. Please ensure your `.env` file is set up correctly.")
+        return
+    # --- Sidebar for File Upload ---
+    with st.sidebar:
+        st.header("1. Upload CSV File")
+        uploaded_file = st.file_uploader(
+            "Choose a CSV file",
+            type=["csv"],
+            help="The app automatically detects the delimiter (comma, semicolon, etc.)."
+        )
+        st.markdown("---")
+        st.subheader("2. Example Queries")
+        st.info(
+            """
+            - What is the average value of the 'Sales' column?
+            - Show me the top 5 highest 'Profit' rows.
+            - How many unique values are there in 'Customer Name'?
+            - Plot a histogram of the 'Age' column.
+            """
+        )
+    # --- Main Content Area ---
+    if uploaded_file is not None:
+        # Convert uploaded file to BytesIO object for multiple reads
+        uploaded_file_bytes = io.BytesIO(uploaded_file.getvalue())
+        # Display file preview
+        if not display_data_preview(uploaded_file_bytes):
+            return # Stop execution if preview fails
+        st.subheader("3. Ask Your Data Question")
+        user_query = st.text_input(
+            "Enter your query here (e.g., 'What is the sum of the Price column?'):",
+            key="user_input_query",
+            placeholder="E.g., What is the average customer age?"
         )
+        if user_query:
+            with st.spinner("🤖 Analyzing data with LangChain Agent..."):
+                try:
+                    # Call the core analysis function
+                    result = query_agent(uploaded_file_bytes, user_query)
+                    st.success("Analysis Complete!")
+                    st.markdown("### Answer")
+                    st.write(result)
+                except Exception as e:
+                    st.error(f"An unexpected error occurred: {e}")
+    else:
+        st.info("Please upload a CSV file in the sidebar to begin the data analysis.")
+if __name__ == "__main__":
+    import pandas as pd
+    main()