import os import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace # Configure page st.set_page_config(page_title="CSV Illuminator", layout="wide") st.title("πŸ“Š CSV Illuminator") st.markdown("_Shedding light on hidden patterns in data with AI._") # Set API Token (make sure environment variable is set in deployment) HF_TOKEN = os.getenv("hf") os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf") os.environ['hf'] = os.getenv("hf") # Chat memory if "chat_history" not in st.session_state: st.session_state.chat_history = [] # Model setup @st.cache_resource def load_model(): endpoint = HuggingFaceEndpoint( repo_id="deepseek-ai/DeepSeek-R1", provider="nebius", temperature=0.5, max_new_tokens=200, task="conversational" ) return ChatHuggingFace(llm=endpoint) model = load_model() # File uploader st.sidebar.header("πŸ“ Upload Your CSV File") uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type=["csv"]) if uploaded_file: try: df = pd.read_csv(uploaded_file) st.success("βœ… File loaded successfully!") # Dataset overview with st.expander("πŸ“‹ Dataset Overview", expanded=True): st.write("**Shape:**", df.shape) st.write("**Columns:**", df.columns.tolist()) st.write("**Missing Values:**") st.dataframe(df.isnull().sum()) st.write("**Data Types:**") st.dataframe(df.dtypes) # AI-powered Q&A st.subheader("πŸ’¬ Ask a Question About Your Data") user_question = st.text_input("Type your question here (e.g. 'What’s the average price?', 'Plot revenue by month')") if user_question: sample = df.head(50).to_csv(index=False) prompt = f""" You are a professional data analyst. The following is a sample of a dataset and a user question. Answer clearly in plain English. If plotting is needed, return valid Python code using matplotlib or seaborn. Dataset Preview: {sample} User Question: {user_question} """ with st.spinner("Analyzing your data..."): try: response = model.invoke([{"role": "user", "content": prompt}]) result = response.content if hasattr(response, "content") else response st.session_state.chat_history.append((user_question, result)) st.markdown("### 🧠 Answer") st.markdown(result) except Exception as e: st.error(f"Model error: {e}") # Auto-plotting if user asks for a chart if user_question and "plot" in user_question.lower(): st.subheader("πŸ“ˆ Auto-Generated Plot") try: numeric_cols = df.select_dtypes(include='number').columns.tolist() if len(numeric_cols) >= 2: fig, ax = plt.subplots() sns.lineplot(data=df, x=numeric_cols[0], y=numeric_cols[1], ax=ax) st.pyplot(fig) else: st.warning("Not enough numeric columns found for plotting.") except Exception as e: st.error(f"Plotting error: {e}") # Chat history if st.session_state.chat_history: with st.expander("πŸ“š Previous Interactions"): for q, a in st.session_state.chat_history: st.markdown(f"**🧍 You:** {q}") st.markdown(f"**πŸ€– Bot:** {a}") except Exception as e: st.error(f"Error reading CSV: {e}") else: st.info("πŸ‘ˆ Upload a CSV file to get started.")