Spaces:

UmaKumpatla
/

CSV_Scout

Sleeping

App Files Files Community

CSV_Scout / app.py

UmaKumpatla

Update app.py

e684ac9 verified 9 months ago

raw

history blame contribute delete

3.77 kB

	import os
	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

	# Configure page
	st.set_page_config(page_title="CSV Illuminator", layout="wide")
	st.title("📊 CSV Illuminator")
	st.markdown("_Shedding light on hidden patterns in data with AI._")

	# Set API Token (make sure environment variable is set in deployment)
	HF_TOKEN = os.getenv("hf")
	os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf")
	os.environ['hf'] = os.getenv("hf")

	# Chat memory
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	# Model setup
	@st.cache_resource
	def load_model():
	endpoint = HuggingFaceEndpoint(
	repo_id="deepseek-ai/DeepSeek-R1",
	provider="nebius",
	temperature=0.5,
	max_new_tokens=200,
	task="conversational"
	)
	return ChatHuggingFace(llm=endpoint)

	model = load_model()

	# File uploader
	st.sidebar.header("📁 Upload Your CSV File")
	uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type=["csv"])

	if uploaded_file:
	try:
	df = pd.read_csv(uploaded_file)
	st.success("✅ File loaded successfully!")

	# Dataset overview
	with st.expander("📋 Dataset Overview", expanded=True):
	st.write("Shape:", df.shape)
	st.write("Columns:", df.columns.tolist())
	st.write("Missing Values:")
	st.dataframe(df.isnull().sum())
	st.write("Data Types:")
	st.dataframe(df.dtypes)

	# AI-powered Q&A
	st.subheader("💬 Ask a Question About Your Data")
	user_question = st.text_input("Type your question here (e.g. 'What’s the average price?', 'Plot revenue by month')")

	if user_question:
	sample = df.head(50).to_csv(index=False)

	prompt = f"""
	You are a professional data analyst. The following is a sample of a dataset and a user question.
	Answer clearly in plain English. If plotting is needed, return valid Python code using matplotlib or seaborn.
	Dataset Preview:
	{sample}
	User Question: {user_question}
	"""

	with st.spinner("Analyzing your data..."):
	try:
	response = model.invoke([{"role": "user", "content": prompt}])
	result = response.content if hasattr(response, "content") else response

	st.session_state.chat_history.append((user_question, result))
	st.markdown("### 🧠 Answer")
	st.markdown(result)

	except Exception as e:
	st.error(f"Model error: {e}")

	# Auto-plotting if user asks for a chart
	if user_question and "plot" in user_question.lower():
	st.subheader("📈 Auto-Generated Plot")
	try:
	numeric_cols = df.select_dtypes(include='number').columns.tolist()
	if len(numeric_cols) >= 2:
	fig, ax = plt.subplots()
	sns.lineplot(data=df, x=numeric_cols[0], y=numeric_cols[1], ax=ax)
	st.pyplot(fig)
	else:
	st.warning("Not enough numeric columns found for plotting.")
	except Exception as e:
	st.error(f"Plotting error: {e}")

	# Chat history
	if st.session_state.chat_history:
	with st.expander("📚 Previous Interactions"):
	for q, a in st.session_state.chat_history:
	st.markdown(f"🧍 You: {q}")
	st.markdown(f"🤖 Bot: {a}")

	except Exception as e:
	st.error(f"Error reading CSV: {e}")
	else:
	st.info("👈 Upload a CSV file to get started.")