File size: 3,772 Bytes
2120344
 
 
 
 
 
 
 
e684ac9
 
 
2120344
 
 
 
d0dd204
2120344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

# Configure page
st.set_page_config(page_title="CSV Illuminator", layout="wide")
st.title("πŸ“Š CSV Illuminator")
st.markdown("_Shedding light on hidden patterns in data with AI._")

# Set API Token (make sure environment variable is set in deployment)
HF_TOKEN = os.getenv("hf")
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf")
os.environ['hf'] = os.getenv("hf")

# Chat memory
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# Model setup
@st.cache_resource
def load_model():
    endpoint = HuggingFaceEndpoint(
        repo_id="deepseek-ai/DeepSeek-R1",
        provider="nebius",
        temperature=0.5,
        max_new_tokens=200,
        task="conversational"
    )
    return ChatHuggingFace(llm=endpoint)

model = load_model()

# File uploader
st.sidebar.header("πŸ“ Upload Your CSV File")
uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type=["csv"])

if uploaded_file:
    try:
        df = pd.read_csv(uploaded_file)
        st.success("βœ… File loaded successfully!")

        # Dataset overview
        with st.expander("πŸ“‹ Dataset Overview", expanded=True):
            st.write("**Shape:**", df.shape)
            st.write("**Columns:**", df.columns.tolist())
            st.write("**Missing Values:**")
            st.dataframe(df.isnull().sum())
            st.write("**Data Types:**")
            st.dataframe(df.dtypes)

        # AI-powered Q&A
        st.subheader("πŸ’¬ Ask a Question About Your Data")
        user_question = st.text_input("Type your question here (e.g. 'What’s the average price?', 'Plot revenue by month')")

        if user_question:
            sample = df.head(50).to_csv(index=False)

            prompt = f"""
You are a professional data analyst. The following is a sample of a dataset and a user question. 
Answer clearly in plain English. If plotting is needed, return valid Python code using matplotlib or seaborn.
Dataset Preview:
{sample}
User Question: {user_question}
"""

            with st.spinner("Analyzing your data..."):
                try:
                    response = model.invoke([{"role": "user", "content": prompt}])
                    result = response.content if hasattr(response, "content") else response

                    st.session_state.chat_history.append((user_question, result))
                    st.markdown("### 🧠 Answer")
                    st.markdown(result)

                except Exception as e:
                    st.error(f"Model error: {e}")

        # Auto-plotting if user asks for a chart
        if user_question and "plot" in user_question.lower():
            st.subheader("πŸ“ˆ Auto-Generated Plot")
            try:
                numeric_cols = df.select_dtypes(include='number').columns.tolist()
                if len(numeric_cols) >= 2:
                    fig, ax = plt.subplots()
                    sns.lineplot(data=df, x=numeric_cols[0], y=numeric_cols[1], ax=ax)
                    st.pyplot(fig)
                else:
                    st.warning("Not enough numeric columns found for plotting.")
            except Exception as e:
                st.error(f"Plotting error: {e}")

        # Chat history
        if st.session_state.chat_history:
            with st.expander("πŸ“š Previous Interactions"):
                for q, a in st.session_state.chat_history:
                    st.markdown(f"**🧍 You:** {q}")
                    st.markdown(f"**πŸ€– Bot:** {a}")

    except Exception as e:
        st.error(f"Error reading CSV: {e}")
else:
    st.info("πŸ‘ˆ Upload a CSV file to get started.")