Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,135 +1,103 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
|
| 3 |
-
# Import the helper functions, including the new guide generator
|
| 4 |
-
from utils import perform_eda, query_agent, generate_eda_guide
|
| 5 |
import os
|
| 6 |
-
|
| 7 |
-
|
| 8 |
|
| 9 |
-
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
st.markdown("Upload a CSV file and ask the Mistral-powered agent questions about your data.")
|
| 14 |
-
|
| 15 |
-
# --- Sidebar for Instructions and LLM-Generated EDA Steps ---
|
| 16 |
-
st.sidebar.header("Instructions")
|
| 17 |
-
st.sidebar.markdown(
|
| 18 |
-
"""
|
| 19 |
-
1. **Upload** your CSV file below.
|
| 20 |
-
2. The application will immediately run a **preliminary EDA**.
|
| 21 |
-
3. Use the **Query Box** in the main area to ask detailed questions about your data.
|
| 22 |
-
"""
|
| 23 |
-
)
|
| 24 |
-
|
| 25 |
-
# Use caching so the LLM guide is only generated once at startup
|
| 26 |
-
@st.cache_data(show_spinner="Generating LLM-Powered EDA Guide...")
|
| 27 |
-
def get_eda_guide():
|
| 28 |
-
"""Wrapper function to cache the result of the LLM guide generation."""
|
| 29 |
try:
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
#
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
# ---
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
@st.cache_data
|
| 47 |
-
def load_data(file):
|
| 48 |
-
try:
|
| 49 |
-
# FIX: Use a regex pattern for separation to handle common delimiters (comma, tab, semicolon, colon)
|
| 50 |
-
# The 'python' engine is required for regex separators.
|
| 51 |
-
df = pd.read_csv(file, sep='[,\t;:]', engine='python', on_bad_lines='skip')
|
| 52 |
-
|
| 53 |
-
# Additional check: If only one column is read, it likely failed. Try standard comma again as a fallback.
|
| 54 |
-
if len(df.columns) <= 1:
|
| 55 |
-
file.seek(0) # Reset file pointer
|
| 56 |
-
df = pd.read_csv(file, on_bad_lines='skip')
|
| 57 |
-
|
| 58 |
-
# If still only one column, there is a fundamental issue with the file
|
| 59 |
-
if len(df.columns) <= 1:
|
| 60 |
-
raise ValueError("Failed to auto-detect delimiter. Data is in a single column.")
|
| 61 |
-
|
| 62 |
-
return df
|
| 63 |
-
except Exception as e:
|
| 64 |
-
st.error(f"Error reading CSV: {e}")
|
| 65 |
-
st.error("Attempted to auto-detect delimiter (comma, semicolon, tab, colon) but failed. Please ensure the file is valid and properly delimited.")
|
| 66 |
-
return None
|
| 67 |
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
st.
|
| 74 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
#
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
st.markdown(eda_markdown)
|
| 80 |
|
| 81 |
-
|
| 82 |
-
st.
|
| 83 |
-
|
| 84 |
-
"
|
| 85 |
-
placeholder="
|
| 86 |
)
|
| 87 |
|
| 88 |
-
if
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
response = query_agent(df, query)
|
| 94 |
-
|
| 95 |
-
st.markdown("---")
|
| 96 |
-
st.subheader("Agent Response")
|
| 97 |
-
|
| 98 |
-
# --- NEW LOGIC FOR FORMATTING OUTPUT ---
|
| 99 |
-
|
| 100 |
-
# 1. Check if the response contains a plot (Streamlit handles plot display automatically)
|
| 101 |
-
if "plot" in query.lower() or "chart" in query.lower() or "graph" in query.lower():
|
| 102 |
-
st.success("Plot generated successfully (displayed below).")
|
| 103 |
-
st.text_area("Agent's Explanation:", value=response, height=100)
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
try:
|
| 112 |
-
# Use StringIO to read the response string as if it were a file
|
| 113 |
-
# The import is already here or should be added: from io import StringIO
|
| 114 |
-
temp_df = pd.read_csv(StringIO(response.strip()), sep='\s\s+', engine='python')
|
| 115 |
-
if len(temp_df.columns) > 1:
|
| 116 |
-
st.dataframe(temp_df)
|
| 117 |
-
is_dataframe = True
|
| 118 |
-
|
| 119 |
-
except Exception:
|
| 120 |
-
# If parsing fails, it's not a clean DataFrame output, so treat it as text.
|
| 121 |
-
pass
|
| 122 |
-
|
| 123 |
-
# 3. If it wasn't a plot and we couldn't parse a clean DataFrame, display as informational text.
|
| 124 |
-
if not is_dataframe:
|
| 125 |
-
st.info(response)
|
| 126 |
-
|
| 127 |
-
else:
|
| 128 |
-
st.warning("Please enter a query to analyze the data.")
|
| 129 |
|
| 130 |
-
else:
|
| 131 |
-
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
from dotenv import load_dotenv
|
|
|
|
|
|
|
| 3 |
import os
|
| 4 |
+
from utils import query_agent
|
| 5 |
+
import io
|
| 6 |
|
| 7 |
+
# Load environment variables (like HUGGINGFACEHUB_API_TOKEN) from .env
|
| 8 |
+
load_dotenv()
|
| 9 |
|
| 10 |
+
def display_data_preview(uploaded_file_bytes):
|
| 11 |
+
"""Utility function to display a preview of the uploaded data."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
try:
|
| 13 |
+
# Use utils' logic to detect delimiter
|
| 14 |
+
from utils import detect_csv_delimiter
|
| 15 |
+
delimiter = detect_csv_delimiter(uploaded_file_bytes)
|
| 16 |
+
|
| 17 |
+
# Reset stream and load for preview
|
| 18 |
+
uploaded_file_bytes.seek(0)
|
| 19 |
+
df_preview = pd.read_csv(uploaded_file_bytes, sep=delimiter, nrows=5)
|
| 20 |
+
|
| 21 |
+
st.subheader("Data Preview (First 5 Rows)")
|
| 22 |
+
st.dataframe(df_preview, use_container_width=True)
|
| 23 |
+
st.caption(f"File loaded using detected delimiter: `{delimiter}`")
|
| 24 |
|
| 25 |
+
# Reset stream position again for the main agent function
|
| 26 |
+
uploaded_file_bytes.seek(0)
|
| 27 |
+
return True
|
| 28 |
+
except Exception as e:
|
| 29 |
+
st.error(f"Failed to read or preview the CSV file. Please ensure it is a valid CSV format. Error: {e}")
|
| 30 |
+
return False
|
| 31 |
|
| 32 |
+
# --- Main Streamlit Application ---
|
| 33 |
+
def main():
|
| 34 |
+
"""Main function to run the Streamlit application."""
|
| 35 |
+
st.set_page_config(
|
| 36 |
+
page_title="NL Data Analyst (LLM Powered)",
|
| 37 |
+
layout="wide",
|
| 38 |
+
initial_sidebar_state="expanded"
|
| 39 |
+
)
|
| 40 |
|
| 41 |
+
st.title("π Natural Language Data Analyst")
|
| 42 |
+
st.markdown("Upload a CSV file and ask questions about your data using plain English.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
# Check for API key presence
|
| 45 |
+
if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
|
| 46 |
+
st.error("π¨ HUGGINGFACEHUB_API_TOKEN not found. Please ensure your `.env` file is set up correctly.")
|
| 47 |
+
return
|
| 48 |
|
| 49 |
+
# --- Sidebar for File Upload ---
|
| 50 |
+
with st.sidebar:
|
| 51 |
+
st.header("1. Upload CSV File")
|
| 52 |
+
uploaded_file = st.file_uploader(
|
| 53 |
+
"Choose a CSV file",
|
| 54 |
+
type=["csv"],
|
| 55 |
+
help="The app automatically detects the delimiter (comma, semicolon, etc.)."
|
| 56 |
+
)
|
| 57 |
|
| 58 |
+
st.markdown("---")
|
| 59 |
+
st.subheader("2. Example Queries")
|
| 60 |
+
st.info(
|
| 61 |
+
"""
|
| 62 |
+
- What is the average value of the 'Sales' column?
|
| 63 |
+
- Show me the top 5 highest 'Profit' rows.
|
| 64 |
+
- How many unique values are there in 'Customer Name'?
|
| 65 |
+
- Plot a histogram of the 'Age' column.
|
| 66 |
+
"""
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# --- Main Content Area ---
|
| 70 |
+
if uploaded_file is not None:
|
| 71 |
+
# Convert uploaded file to BytesIO object for multiple reads
|
| 72 |
+
uploaded_file_bytes = io.BytesIO(uploaded_file.getvalue())
|
| 73 |
|
| 74 |
+
# Display file preview
|
| 75 |
+
if not display_data_preview(uploaded_file_bytes):
|
| 76 |
+
return # Stop execution if preview fails
|
|
|
|
| 77 |
|
| 78 |
+
st.subheader("3. Ask Your Data Question")
|
| 79 |
+
user_query = st.text_input(
|
| 80 |
+
"Enter your query here (e.g., 'What is the sum of the Price column?'):",
|
| 81 |
+
key="user_input_query",
|
| 82 |
+
placeholder="E.g., What is the average customer age?"
|
| 83 |
)
|
| 84 |
|
| 85 |
+
if user_query:
|
| 86 |
+
with st.spinner("π€ Analyzing data with LangChain Agent..."):
|
| 87 |
+
try:
|
| 88 |
+
# Call the core analysis function
|
| 89 |
+
result = query_agent(uploaded_file_bytes, user_query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
+
st.success("Analysis Complete!")
|
| 92 |
+
st.markdown("### Answer")
|
| 93 |
+
st.write(result)
|
| 94 |
|
| 95 |
+
except Exception as e:
|
| 96 |
+
st.error(f"An unexpected error occurred: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
else:
|
| 99 |
+
st.info("Please upload a CSV file in the sidebar to begin the data analysis.")
|
| 100 |
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
import pandas as pd
|
| 103 |
+
main()
|