Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from io import StringIO | |
| from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace | |
| # Set API token | |
| os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf") | |
| os.environ['HF_TOKEN'] = os.getenv("hf") | |
| st.title("π DataCraft CSV") | |
| st.subheader("β Crafting insights from structured data") | |
| # Session state for chat history | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| # Upload CSV | |
| uploaded_file = st.file_uploader("Upload CSV", type=["csv"]) | |
| if uploaded_file: | |
| df = pd.read_csv(uploaded_file) | |
| st.success("β File loaded successfully!") | |
| st.subheader("π Quick Summary") | |
| st.write("**Shape:**", df.shape) | |
| st.write("**Columns:**", df.columns.tolist()) | |
| st.write("**Missing Values:**") | |
| st.dataframe(df.isnull().sum()) | |
| st.write("**Data Types:**") | |
| st.dataframe(df.dtypes) | |
| st.subheader("π¬ Ask a question about the dataset") | |
| user_input = st.text_input("E.g. 'What are the average values?', 'Plot sales over time'") | |
| # Hugging Face Model Setup | |
| deepseek = HuggingFaceEndpoint( | |
| repo_id="deepseek-ai/DeepSeek-R1", | |
| provider="nebius", | |
| temperature=0.5, | |
| max_new_tokens=150, | |
| task="conversational" | |
| ) | |
| model = ChatHuggingFace( | |
| llm=deepseek, | |
| repo_id=deepseek.repo_id, | |
| provider=deepseek.provider, | |
| temperature=0.5, | |
| max_new_tokens=150, | |
| task="conversational" | |
| ) | |
| if user_input: | |
| df_sample = df.head(50).to_csv(index=False) | |
| prompt = f""" | |
| You are a helpful data analyst. Here's a preview of the dataset and a user question. Provide an answer in plain English. If the question mentions plotting, include the code as well. | |
| Dataset: | |
| {df_sample} | |
| User question: {user_input} | |
| """ | |
| with st.spinner("Thinking..."): | |
| try: | |
| response = model.invoke([{"role": "user", "content": prompt}]) | |
| result = response.content if hasattr(response, "content") else response | |
| st.session_state.chat_history.append((user_input, result)) | |
| st.markdown("### π§ Answer") | |
| st.write(result) | |
| # Optional: Execute simple plot command if mentioned | |
| if "plot" in user_input.lower(): | |
| with st.expander("π Try plotting automatically"): | |
| try: | |
| # Try simple detection for column plots | |
| cols = df.select_dtypes(include='number').columns.tolist() | |
| if len(cols) >= 2: | |
| fig, ax = plt.subplots() | |
| sns.lineplot(data=df, x=cols[0], y=cols[1], ax=ax) | |
| st.pyplot(fig) | |
| else: | |
| st.info("Could not find enough numeric columns to plot.") | |
| except Exception as e: | |
| st.error(f"Plotting failed: {e}") | |
| except Exception as e: | |
| st.error(f"Error: {e}") | |
| # Display previous chat history | |
| if st.session_state.chat_history: | |
| st.subheader("π Previous Q&A") | |
| for q, a in st.session_state.chat_history: | |
| st.markdown(f"**You:** {q}") | |
| st.markdown(f"**Bot:** {a}") |