import os import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from io import StringIO from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace # Set API token os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf") os.environ['HF_TOKEN'] = os.getenv("hf") st.title("📊 DataCraft CSV") st.subheader("– Crafting insights from structured data") # Session state for chat history if "chat_history" not in st.session_state: st.session_state.chat_history = [] # Upload CSV uploaded_file = st.file_uploader("Upload CSV", type=["csv"]) if uploaded_file: df = pd.read_csv(uploaded_file) st.success("✅ File loaded successfully!") st.subheader("🔍 Quick Summary") st.write("**Shape:**", df.shape) st.write("**Columns:**", df.columns.tolist()) st.write("**Missing Values:**") st.dataframe(df.isnull().sum()) st.write("**Data Types:**") st.dataframe(df.dtypes) st.subheader("💬 Ask a question about the dataset") user_input = st.text_input("E.g. 'What are the average values?', 'Plot sales over time'") # Hugging Face Model Setup deepseek = HuggingFaceEndpoint( repo_id="deepseek-ai/DeepSeek-R1", provider="nebius", temperature=0.5, max_new_tokens=150, task="conversational" ) model = ChatHuggingFace( llm=deepseek, repo_id=deepseek.repo_id, provider=deepseek.provider, temperature=0.5, max_new_tokens=150, task="conversational" ) if user_input: df_sample = df.head(50).to_csv(index=False) prompt = f""" You are a helpful data analyst. Here's a preview of the dataset and a user question. Provide an answer in plain English. If the question mentions plotting, include the code as well. Dataset: {df_sample} User question: {user_input} """ with st.spinner("Thinking..."): try: response = model.invoke([{"role": "user", "content": prompt}]) result = response.content if hasattr(response, "content") else response st.session_state.chat_history.append((user_input, result)) st.markdown("### 🧠 Answer") st.write(result) # Optional: Execute simple plot command if mentioned if "plot" in user_input.lower(): with st.expander("📈 Try plotting automatically"): try: # Try simple detection for column plots cols = df.select_dtypes(include='number').columns.tolist() if len(cols) >= 2: fig, ax = plt.subplots() sns.lineplot(data=df, x=cols[0], y=cols[1], ax=ax) st.pyplot(fig) else: st.info("Could not find enough numeric columns to plot.") except Exception as e: st.error(f"Plotting failed: {e}") except Exception as e: st.error(f"Error: {e}") # Display previous chat history if st.session_state.chat_history: st.subheader("📚 Previous Q&A") for q, a in st.session_state.chat_history: st.markdown(f"**You:** {q}") st.markdown(f"**Bot:** {a}")