CSV_Scout / app.py
UmaKumpatla's picture
Update app.py
e684ac9 verified
import os
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
# Configure page
st.set_page_config(page_title="CSV Illuminator", layout="wide")
st.title("πŸ“Š CSV Illuminator")
st.markdown("_Shedding light on hidden patterns in data with AI._")
# Set API Token (make sure environment variable is set in deployment)
HF_TOKEN = os.getenv("hf")
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf")
os.environ['hf'] = os.getenv("hf")
# Chat memory
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Model setup
@st.cache_resource
def load_model():
endpoint = HuggingFaceEndpoint(
repo_id="deepseek-ai/DeepSeek-R1",
provider="nebius",
temperature=0.5,
max_new_tokens=200,
task="conversational"
)
return ChatHuggingFace(llm=endpoint)
model = load_model()
# File uploader
st.sidebar.header("πŸ“ Upload Your CSV File")
uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type=["csv"])
if uploaded_file:
try:
df = pd.read_csv(uploaded_file)
st.success("βœ… File loaded successfully!")
# Dataset overview
with st.expander("πŸ“‹ Dataset Overview", expanded=True):
st.write("**Shape:**", df.shape)
st.write("**Columns:**", df.columns.tolist())
st.write("**Missing Values:**")
st.dataframe(df.isnull().sum())
st.write("**Data Types:**")
st.dataframe(df.dtypes)
# AI-powered Q&A
st.subheader("πŸ’¬ Ask a Question About Your Data")
user_question = st.text_input("Type your question here (e.g. 'What’s the average price?', 'Plot revenue by month')")
if user_question:
sample = df.head(50).to_csv(index=False)
prompt = f"""
You are a professional data analyst. The following is a sample of a dataset and a user question.
Answer clearly in plain English. If plotting is needed, return valid Python code using matplotlib or seaborn.
Dataset Preview:
{sample}
User Question: {user_question}
"""
with st.spinner("Analyzing your data..."):
try:
response = model.invoke([{"role": "user", "content": prompt}])
result = response.content if hasattr(response, "content") else response
st.session_state.chat_history.append((user_question, result))
st.markdown("### 🧠 Answer")
st.markdown(result)
except Exception as e:
st.error(f"Model error: {e}")
# Auto-plotting if user asks for a chart
if user_question and "plot" in user_question.lower():
st.subheader("πŸ“ˆ Auto-Generated Plot")
try:
numeric_cols = df.select_dtypes(include='number').columns.tolist()
if len(numeric_cols) >= 2:
fig, ax = plt.subplots()
sns.lineplot(data=df, x=numeric_cols[0], y=numeric_cols[1], ax=ax)
st.pyplot(fig)
else:
st.warning("Not enough numeric columns found for plotting.")
except Exception as e:
st.error(f"Plotting error: {e}")
# Chat history
if st.session_state.chat_history:
with st.expander("πŸ“š Previous Interactions"):
for q, a in st.session_state.chat_history:
st.markdown(f"**🧍 You:** {q}")
st.markdown(f"**πŸ€– Bot:** {a}")
except Exception as e:
st.error(f"Error reading CSV: {e}")
else:
st.info("πŸ‘ˆ Upload a CSV file to get started.")