File size: 3,436 Bytes
7569eb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

# Set API token
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf")
os.environ['HF_TOKEN'] = os.getenv("hf")

st.title("πŸ“Š DataCraft CSV")
st.subheader("– Crafting insights from structured data")

# Session state for chat history
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# Upload CSV
uploaded_file = st.file_uploader("Upload CSV", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.success("βœ… File loaded successfully!")
    st.subheader("πŸ” Quick Summary")
    
    st.write("**Shape:**", df.shape)
    st.write("**Columns:**", df.columns.tolist())
    st.write("**Missing Values:**")
    st.dataframe(df.isnull().sum())
    st.write("**Data Types:**")
    st.dataframe(df.dtypes)

    st.subheader("πŸ’¬ Ask a question about the dataset")

    user_input = st.text_input("E.g. 'What are the average values?', 'Plot sales over time'")

    # Hugging Face Model Setup
    deepseek = HuggingFaceEndpoint(
        repo_id="deepseek-ai/DeepSeek-R1",
        provider="nebius",
        temperature=0.5,
        max_new_tokens=150,
        task="conversational"
    )

    model = ChatHuggingFace(
        llm=deepseek,
        repo_id=deepseek.repo_id,
        provider=deepseek.provider,
        temperature=0.5,
        max_new_tokens=150,
        task="conversational"
    )

    if user_input:
        df_sample = df.head(50).to_csv(index=False)

        prompt = f"""
You are a helpful data analyst. Here's a preview of the dataset and a user question. Provide an answer in plain English. If the question mentions plotting, include the code as well.
Dataset:
{df_sample}
User question: {user_input}
"""

        with st.spinner("Thinking..."):
            try:
                response = model.invoke([{"role": "user", "content": prompt}])
                result = response.content if hasattr(response, "content") else response
                st.session_state.chat_history.append((user_input, result))

                st.markdown("### 🧠 Answer")
                st.write(result)

                # Optional: Execute simple plot command if mentioned
                if "plot" in user_input.lower():
                    with st.expander("πŸ“ˆ Try plotting automatically"):
                        try:
                            # Try simple detection for column plots
                            cols = df.select_dtypes(include='number').columns.tolist()
                            if len(cols) >= 2:
                                fig, ax = plt.subplots()
                                sns.lineplot(data=df, x=cols[0], y=cols[1], ax=ax)
                                st.pyplot(fig)
                            else:
                                st.info("Could not find enough numeric columns to plot.")
                        except Exception as e:
                            st.error(f"Plotting failed: {e}")

            except Exception as e:
                st.error(f"Error: {e}")

    # Display previous chat history
    if st.session_state.chat_history:
        st.subheader("πŸ“š Previous Q&A")
        for q, a in st.session_state.chat_history:
            st.markdown(f"**You:** {q}")
            st.markdown(f"**Bot:** {a}")