LeannJoy commited on
Commit
6e2134f
Β·
verified Β·
1 Parent(s): 3b29588

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -116
app.py CHANGED
@@ -1,135 +1,103 @@
1
  import streamlit as st
2
- import pandas as pd
3
- # Import the helper functions, including the new guide generator
4
- from utils import perform_eda, query_agent, generate_eda_guide
5
  import os
6
- # Import StringIO for the custom DataFrame parsing logic
7
- from io import StringIO # Keep this import, which is also used later in app.py
8
 
9
- st.set_page_config(layout="wide", page_title="NL Data Analyst Agent")
 
10
 
11
- # --- Streamlit UI Setup ---
12
- st.title("πŸ“Š Natural Language Data Analyst")
13
- st.markdown("Upload a CSV file and ask the Mistral-powered agent questions about your data.")
14
-
15
- # --- Sidebar for Instructions and LLM-Generated EDA Steps ---
16
- st.sidebar.header("Instructions")
17
- st.sidebar.markdown(
18
- """
19
- 1. **Upload** your CSV file below.
20
- 2. The application will immediately run a **preliminary EDA**.
21
- 3. Use the **Query Box** in the main area to ask detailed questions about your data.
22
- """
23
- )
24
-
25
- # Use caching so the LLM guide is only generated once at startup
26
- @st.cache_data(show_spinner="Generating LLM-Powered EDA Guide...")
27
- def get_eda_guide():
28
- """Wrapper function to cache the result of the LLM guide generation."""
29
  try:
30
- return generate_eda_guide()
31
- except Exception:
32
- return "Could not generate guide. Check LLM setup."
33
-
34
- # Put the LLM-generated guide inside an expander
35
- with st.sidebar.expander("πŸ“š **LLM-Powered EDA Guide**", expanded=True):
36
- guide_content = get_eda_guide()
37
- st.markdown(guide_content)
 
 
 
38
 
39
- st.sidebar.info("Remember: The agent executes Python code. Be clear and specific with your requests, especially for plotting and complex analysis.")
 
 
 
 
 
40
 
41
- # --- File Uploader ---
42
- uploaded_file = st.file_uploader("Upload CSV File", type="csv")
 
 
 
 
 
 
43
 
44
- if uploaded_file is not None:
45
- # Read the file and store it in session state
46
- @st.cache_data
47
- def load_data(file):
48
- try:
49
- # FIX: Use a regex pattern for separation to handle common delimiters (comma, tab, semicolon, colon)
50
- # The 'python' engine is required for regex separators.
51
- df = pd.read_csv(file, sep='[,\t;:]', engine='python', on_bad_lines='skip')
52
-
53
- # Additional check: If only one column is read, it likely failed. Try standard comma again as a fallback.
54
- if len(df.columns) <= 1:
55
- file.seek(0) # Reset file pointer
56
- df = pd.read_csv(file, on_bad_lines='skip')
57
-
58
- # If still only one column, there is a fundamental issue with the file
59
- if len(df.columns) <= 1:
60
- raise ValueError("Failed to auto-detect delimiter. Data is in a single column.")
61
-
62
- return df
63
- except Exception as e:
64
- st.error(f"Error reading CSV: {e}")
65
- st.error("Attempted to auto-detect delimiter (comma, semicolon, tab, colon) but failed. Please ensure the file is valid and properly delimited.")
66
- return None
67
 
68
- df = load_data(uploaded_file)
 
 
 
69
 
70
- if df is not None:
71
- st.session_state.df = df
 
 
 
 
 
 
72
 
73
- st.subheader("Uploaded Data Preview")
74
- st.dataframe(df.head())
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- # Perform and display the initial EDA
77
- eda_markdown = perform_eda(df)
78
- with st.expander("Preliminary EDA Results (Click to View Structure, Types, and Quality Checks)", expanded=True):
79
- st.markdown(eda_markdown)
80
 
81
- # --- Query Interface ---
82
- st.subheader("Ask Your Data Analyst Agent")
83
- query = st.text_area(
84
- "Enter your natural language query:",
85
- placeholder="e.g., What are the mean and standard deviation of the 'salary' column? Or: Show me a scatter plot of 'age' vs 'income'."
86
  )
87
 
88
- if st.button("Analyze Data"):
89
- if query:
90
- response = ""
91
- with st.spinner("Agent is analyzing the data..."):
92
- # Call the LangChain agent
93
- response = query_agent(df, query)
94
-
95
- st.markdown("---")
96
- st.subheader("Agent Response")
97
-
98
- # --- NEW LOGIC FOR FORMATTING OUTPUT ---
99
-
100
- # 1. Check if the response contains a plot (Streamlit handles plot display automatically)
101
- if "plot" in query.lower() or "chart" in query.lower() or "graph" in query.lower():
102
- st.success("Plot generated successfully (displayed below).")
103
- st.text_area("Agent's Explanation:", value=response, height=100)
104
 
105
- else:
106
- # 2. Try to parse the response as a DataFrame for clean display
107
- is_dataframe = False
108
 
109
- # Common Pandas string representations start with indices or alignment markers
110
- if response.strip().startswith((' ', '0', '1', '2', 'Index', 'Name')) or '\n' in response:
111
- try:
112
- # Use StringIO to read the response string as if it were a file
113
- # The import is already here or should be added: from io import StringIO
114
- temp_df = pd.read_csv(StringIO(response.strip()), sep='\s\s+', engine='python')
115
- if len(temp_df.columns) > 1:
116
- st.dataframe(temp_df)
117
- is_dataframe = True
118
-
119
- except Exception:
120
- # If parsing fails, it's not a clean DataFrame output, so treat it as text.
121
- pass
122
-
123
- # 3. If it wasn't a plot and we couldn't parse a clean DataFrame, display as informational text.
124
- if not is_dataframe:
125
- st.info(response)
126
-
127
- else:
128
- st.warning("Please enter a query to analyze the data.")
129
 
130
- else:
131
- st.info("Awaiting CSV file upload.")
132
 
133
- # Add a note about the required API key for deployment
134
- if 'HUGGINGFACEHUB_API_TOKEN' not in st.session_state and 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
135
- st.sidebar.error("HuggingFace API Token is missing. Set the `HUGGINGFACEHUB_API_TOKEN` environment variable or Streamlit Secret.")
 
1
  import streamlit as st
2
+ from dotenv import load_dotenv
 
 
3
  import os
4
+ from utils import query_agent
5
+ import io
6
 
7
+ # Load environment variables (like HUGGINGFACEHUB_API_TOKEN) from .env
8
+ load_dotenv()
9
 
10
+ def display_data_preview(uploaded_file_bytes):
11
+ """Utility function to display a preview of the uploaded data."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  try:
13
+ # Use utils' logic to detect delimiter
14
+ from utils import detect_csv_delimiter
15
+ delimiter = detect_csv_delimiter(uploaded_file_bytes)
16
+
17
+ # Reset stream and load for preview
18
+ uploaded_file_bytes.seek(0)
19
+ df_preview = pd.read_csv(uploaded_file_bytes, sep=delimiter, nrows=5)
20
+
21
+ st.subheader("Data Preview (First 5 Rows)")
22
+ st.dataframe(df_preview, use_container_width=True)
23
+ st.caption(f"File loaded using detected delimiter: `{delimiter}`")
24
 
25
+ # Reset stream position again for the main agent function
26
+ uploaded_file_bytes.seek(0)
27
+ return True
28
+ except Exception as e:
29
+ st.error(f"Failed to read or preview the CSV file. Please ensure it is a valid CSV format. Error: {e}")
30
+ return False
31
 
32
+ # --- Main Streamlit Application ---
33
+ def main():
34
+ """Main function to run the Streamlit application."""
35
+ st.set_page_config(
36
+ page_title="NL Data Analyst (LLM Powered)",
37
+ layout="wide",
38
+ initial_sidebar_state="expanded"
39
+ )
40
 
41
+ st.title("πŸ“Š Natural Language Data Analyst")
42
+ st.markdown("Upload a CSV file and ask questions about your data using plain English.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # Check for API key presence
45
+ if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
46
+ st.error("🚨 HUGGINGFACEHUB_API_TOKEN not found. Please ensure your `.env` file is set up correctly.")
47
+ return
48
 
49
+ # --- Sidebar for File Upload ---
50
+ with st.sidebar:
51
+ st.header("1. Upload CSV File")
52
+ uploaded_file = st.file_uploader(
53
+ "Choose a CSV file",
54
+ type=["csv"],
55
+ help="The app automatically detects the delimiter (comma, semicolon, etc.)."
56
+ )
57
 
58
+ st.markdown("---")
59
+ st.subheader("2. Example Queries")
60
+ st.info(
61
+ """
62
+ - What is the average value of the 'Sales' column?
63
+ - Show me the top 5 highest 'Profit' rows.
64
+ - How many unique values are there in 'Customer Name'?
65
+ - Plot a histogram of the 'Age' column.
66
+ """
67
+ )
68
+
69
+ # --- Main Content Area ---
70
+ if uploaded_file is not None:
71
+ # Convert uploaded file to BytesIO object for multiple reads
72
+ uploaded_file_bytes = io.BytesIO(uploaded_file.getvalue())
73
 
74
+ # Display file preview
75
+ if not display_data_preview(uploaded_file_bytes):
76
+ return # Stop execution if preview fails
 
77
 
78
+ st.subheader("3. Ask Your Data Question")
79
+ user_query = st.text_input(
80
+ "Enter your query here (e.g., 'What is the sum of the Price column?'):",
81
+ key="user_input_query",
82
+ placeholder="E.g., What is the average customer age?"
83
  )
84
 
85
+ if user_query:
86
+ with st.spinner("πŸ€– Analyzing data with LangChain Agent..."):
87
+ try:
88
+ # Call the core analysis function
89
+ result = query_agent(uploaded_file_bytes, user_query)
 
 
 
 
 
 
 
 
 
 
 
90
 
91
+ st.success("Analysis Complete!")
92
+ st.markdown("### Answer")
93
+ st.write(result)
94
 
95
+ except Exception as e:
96
+ st.error(f"An unexpected error occurred: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ else:
99
+ st.info("Please upload a CSV file in the sidebar to begin the data analysis.")
100
 
101
+ if __name__ == "__main__":
102
+ import pandas as pd
103
+ main()