LeannJoy commited on
Commit
dfbae8c
ยท
verified ยท
1 Parent(s): 55d2921

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -80
app.py CHANGED
@@ -1,91 +1,125 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from utils import perform_eda, query_agent
4
- import io
 
5
 
6
- # --- Page Configuration ---
7
- st.set_page_config(
8
- page_title="NL Data Analysis Agent",
9
- layout="wide",
10
- initial_sidebar_state="expanded"
 
 
 
 
 
 
 
 
 
11
  )
12
 
13
- def main():
14
- """Main function to run the Streamlit application."""
15
-
16
- st.title("๐Ÿ“Š Natural Language Data Analysis Agent")
17
- st.caption("Upload a CSV file and ask questions about your data using plain English.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # --- Sidebar for Instructions ---
20
- with st.sidebar:
21
- st.header("Instructions")
22
- st.markdown("""
23
- 1. **Upload** your CSV file in the main area.
24
- 2. **Review** the automatic Exploratory Data Analysis (EDA) summary.
25
- 3. **Ask** your question in the text box. The system uses a LangChain Agent to run Python/Pandas code on your data and provide an answer.
26
 
27
- **Examples of Queries:**
28
- - "What is the average income?"
29
- - "Show a histogram of the 'Age' column."
30
- - "Calculate the correlation matrix and identify the strongest relationship."
31
- - "Perform a bivariate analysis of 'Gender' and 'Purchase_Amount'."
32
- """)
33
- st.warning("โš ๏ธ **Note on LLM:** This app uses the Gemini API via LangChain. For production use with a truly open-source LLM, you would need to replace the LLM initialization in `utils.py` with a compatible self-hosted or endpoint-based model.")
34
 
35
- # --- File Uploader ---
36
- uploaded_file = st.file_uploader(
37
- "Upload a CSV file",
38
- type="csv",
39
- help="The CSV file should be structured with headers."
40
- )
41
-
42
- if uploaded_file is not None:
43
- try:
44
- # Read the CSV file into a Pandas DataFrame
45
- # We use StringIO to ensure compatibility across environments
46
- uploaded_file.seek(0)
47
- data = uploaded_file.read()
48
- df = pd.read_csv(io.StringIO(data.decode('utf-8')))
49
-
50
- st.success("CSV file successfully loaded!")
51
-
52
- # --- Data Preview ---
53
- st.subheader("Data Preview")
54
- st.dataframe(df.head())
55
-
56
- # --- Automated EDA ---
57
- with st.expander("๐Ÿ”ฌ Automated Exploratory Data Analysis (EDA)", expanded=False):
58
- eda_summary = perform_eda(df)
59
- st.markdown(eda_summary)
60
 
61
- # --- Natural Language Query Interface ---
62
- st.subheader("Ask a Question about the Data")
63
-
64
- # Text input for the user's query
65
- user_query = st.text_area(
66
- "Enter your data question (e.g., 'Plot the distribution of Age' or 'What is the median salary?'):",
67
- placeholder="Ask me anything about your data...",
68
- key="user_query_input"
69
- )
70
-
71
- # Button to trigger the agent
72
- if st.button("Run Analysis", use_container_width=True, type="primary") and user_query:
73
- with st.spinner(f"Analyzing data with LangChain Agent..."):
74
- # Call the agent function from utils.py
75
- agent_response = query_agent(df, user_query)
76
-
77
- st.success("Analysis Complete!")
78
- st.markdown("---")
79
- st.subheader("Agent Response")
80
- # Display the response
81
- st.info(agent_response)
82
 
83
- except Exception as e:
84
- st.error(f"An error occurred while processing the file: {e}")
85
- st.warning("Please ensure your CSV file is correctly formatted.")
86
-
87
- else:
88
- st.info("Awaiting CSV file upload.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- if __name__ == "__main__":
91
- main()
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ # Import the helper functions, including the new guide generator
4
+ from utils import perform_eda, query_agent, generate_eda_guide
5
+ import os
6
 
7
+ st.set_page_config(layout="wide", page_title="NL Data Analyst Agent")
8
+
9
+ # --- Streamlit UI Setup ---
10
+ st.title("๐Ÿ“Š Natural Language Data Analyst")
11
+ st.markdown("Upload a CSV file and ask the Mistral-powered agent questions about your data.")
12
+
13
+ # --- Sidebar for Instructions and LLM-Generated EDA Steps ---
14
+ st.sidebar.header("Instructions")
15
+ st.sidebar.markdown(
16
+ """
17
+ 1. **Upload** your CSV file below.
18
+ 2. The application will immediately run a **preliminary EDA**.
19
+ 3. Use the **Query Box** in the main area to ask detailed questions about your data.
20
+ """
21
  )
22
 
23
+ # Use caching so the LLM guide is only generated once at startup
24
+ @st.cache_data(show_spinner="Generating LLM-Powered EDA Guide...")
25
+ def get_eda_guide():
26
+ """Wrapper function to cache the result of the LLM guide generation."""
27
+ try:
28
+ return generate_eda_guide()
29
+ except Exception:
30
+ return "Could not generate guide. Check LLM setup."
31
+
32
+ # Put the LLM-generated guide inside an expander
33
+ with st.sidebar.expander("๐Ÿ“š **LLM-Powered EDA Guide**", expanded=True):
34
+ guide_content = get_eda_guide()
35
+ st.markdown(guide_content)
36
+
37
+ st.sidebar.info("Remember: The agent executes Python code. Be clear and specific with your requests, especially for plotting and complex analysis.")
38
+
39
+ # --- File Uploader ---
40
+ uploaded_file = st.file_uploader("Upload CSV File", type="csv")
41
+
42
+ if uploaded_file is not None:
43
+ # Read the file and store it in session state
44
+ @st.cache_data
45
+ def load_data(file):
46
+ try:
47
+ # FIX: Changed delimiter to semicolon (;) and skip bad lines
48
+ df = pd.read_csv(file, sep=';', on_bad_lines='skip')
49
+ return df
50
+ except Exception as e:
51
+ st.error(f"Error reading CSV: {e}")
52
+ st.error("Try checking the file delimiter or encoding.")
53
+ return None
54
 
55
+ df = load_data(uploaded_file)
56
+
57
+ if df is not None:
58
+ st.session_state.df = df
 
 
 
59
 
60
+ st.subheader("Uploaded Data Preview")
61
+ st.dataframe(df.head())
 
 
 
 
 
62
 
63
+ # Perform and display the initial EDA
64
+ eda_markdown = perform_eda(df)
65
+ with st.expander("Preliminary EDA Results (Click to View Structure, Types, and Quality Checks)", expanded=True):
66
+ st.markdown(eda_markdown)
67
+
68
+ # --- Query Interface ---
69
+ st.subheader("Ask Your Data Analyst Agent")
70
+ query = st.text_area(
71
+ "Enter your natural language query:",
72
+ placeholder="e.g., What are the mean and standard deviation of the 'salary' column? Or: Show me a scatter plot of 'age' vs 'income'."
73
+ )
74
+
75
+ if st.button("Analyze Data"):
76
+ if query:
77
+ response = ""
78
+ with st.spinner("Agent is analyzing the data..."):
79
+ # Call the LangChain agent
80
+ response = query_agent(df, query)
 
 
 
 
 
 
 
81
 
82
+ st.markdown("---")
83
+ st.subheader("Agent Response")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # --- NEW LOGIC FOR FORMATTING OUTPUT ---
86
+
87
+ # 1. Check if the response contains a plot (Streamlit handles plot display automatically)
88
+ if "plot" in query.lower() or "chart" in query.lower() or "graph" in query.lower():
89
+ st.success("Plot generated successfully (displayed below).")
90
+ st.text_area("Agent's Explanation:", value=response, height=100)
91
+
92
+ else:
93
+ # 2. Try to parse the response as a DataFrame for clean display
94
+ is_dataframe = False
95
+
96
+ # Common Pandas string representations start with indices or alignment markers
97
+ if response.strip().startswith((' ', '0', '1', '2', 'Index', 'Name')) or '\n' in response:
98
+ try:
99
+ # Use StringIO to read the response string as if it were a file
100
+ from io import StringIO
101
+ temp_df = pd.read_csv(StringIO(response.strip()), sep='\s\s+', engine='python')
102
+
103
+ # A successful parse results in a table, but it might not be the intended data.
104
+ # We check if the parsed table has a reasonable number of columns (> 1)
105
+ if len(temp_df.columns) > 1:
106
+ st.dataframe(temp_df)
107
+ is_dataframe = True
108
+
109
+ except Exception:
110
+ # If parsing fails, it's not a clean DataFrame output, so treat it as text.
111
+ pass
112
+
113
+ # 3. If it wasn't a plot and we couldn't parse a clean DataFrame, display as informational text.
114
+ if not is_dataframe:
115
+ st.info(response)
116
+
117
+ else:
118
+ st.warning("Please enter a query to analyze the data.")
119
+
120
+ else:
121
+ st.info("Awaiting CSV file upload.")
122
 
123
+ # Add a note about the required API key for deployment
124
+ if 'HUGGINGFACEHUB_API_TOKEN' not in st.session_state and 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
125
+ st.sidebar.error("HuggingFace API Token is missing. Set the `HUGGINGFACEHUB_API_TOKEN` environment variable or Streamlit Secret.")