csv-redaer-bot / app.py
charesz's picture
Update app.py
7c51375 verified
import os
import streamlit as st
from io import BytesIO
import pandas as pd
from utils import query_agent_from_csv
st.set_page_config(page_title="CSV Q&A β€” HuggingFace Mistral", layout="wide")
st.title("πŸ“Š CSV Q&A β€” Ask questions about your dataset")
# Sidebar
st.sidebar.header("βš™οΈ Settings")
model_repo = st.sidebar.text_input("Model Repo", value="mistralai/Mistral-7B-Instruct-v0.3")
# Initialize session states
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "dataset_summary" not in st.session_state:
st.session_state.dataset_summary = None
if "query_input" not in st.session_state:
st.session_state.query_input = ""
if "should_clear" not in st.session_state:
st.session_state.should_clear = False
# Clear input properly BEFORE widget loads
if st.session_state.should_clear:
st.session_state.query_input = ""
st.session_state.should_clear = False
# File uploader
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if not uploaded_file:
st.info("Upload a CSV file to begin.")
st.stop()
file_bytes = BytesIO(uploaded_file.read())
file_bytes.seek(0)
# Load dataset once
df = pd.read_csv(file_bytes)
if st.session_state.dataset_summary is None:
try:
stats = df.describe(include='all', datetime_is_numeric=True).to_string()
except TypeError:
stats = df.describe(include='all').to_string()
st.session_state.dataset_summary = {
"columns": df.columns.tolist(),
"dtypes": df.dtypes.astype(str).to_dict(),
"stats": stats
}
# Dataset preview
st.subheader("πŸ“‚ Dataset Preview")
st.dataframe(df.head())
with st.expander("πŸ“Š Dataset Info"):
st.write(f"**Rows:** {df.shape[0]}, **Columns:** {df.shape[1]}")
st.write("**Column Types:**", df.dtypes)
st.write("**Missing Values:**", df.isnull().sum())
# Example Questions
with st.expander("πŸ’‘ Example Questions"):
st.write("- What is the average of column X?")
st.write("- Show summary statistics of the dataset.")
st.write("- Which column has the most missing values?")
st.write("- How many unique values are there in column Y?")
# Conversation history
st.write("### πŸ’¬ Conversation")
for chat in st.session_state.chat_history:
with st.chat_message("user"):
st.write(chat["question"])
with st.chat_message("assistant"):
st.write(chat["answer"])
# Input section (with paper plane)
col1, col2 = st.columns([10, 1])
with col1:
user_query = st.text_input(
"Type your question...",
key="query_input",
placeholder="Ask something about the dataset...",
label_visibility="collapsed"
)
with col2:
send_clicked = st.button("πŸ“©")
# Send action
if (send_clicked or user_query) and user_query.strip():
query = user_query.strip()
with st.chat_message("user"):
st.write(query)
with st.spinner("πŸ€– Thinking..."):
answer, error = query_agent_from_csv(
query,
st.session_state.dataset_summary,
st.session_state.chat_history,
model_repo
)
if error:
with st.chat_message("assistant"):
st.error(error)
else:
with st.chat_message("assistant"):
st.write(answer)
st.session_state.chat_history.append({"question": query, "answer": answer})
st.session_state.should_clear = True
st.rerun()