File size: 4,138 Bytes
8f5e545
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st 
import os 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace


os.environ["HF_TOKEN"]=os.getenv('HF_Token')
os.environ["HUGGINGFACEHUB_API_KEY"]=os.getenv('HF_Token')

st.set_page_config(page_title="InsightGenie – AI-Powered CSV Explorer", layout="wide")
st.title("πŸ§™ InsightGenie")
st.markdown("**Explore your CSV like magic – Ask, analyze, and visualize with AI.**")

if "qa_conversations" not in st.session_state:
    st.session_state.qa_conversations = []

uploaded_csv = st.file_uploader("πŸ“‚ Upload your CSV file to begin", type=["csv"])

if uploaded_csv:
    try:
        data = pd.read_csv(uploaded_csv)
        st.success("βœ… Data loaded successfully!")
        
        st.header("πŸ”Ž Dataset Overview")
        st.markdown(f"- **Rows and Columns:** {data.shape[0]} rows Γ— {data.shape[1]} columns")
        st.markdown("**πŸ“Œ Column Names:**")
        st.write(data.columns.tolist())

        col1, col2 = st.columns(2)
        with col1:
            st.markdown("**🧩 Missing Values**")
            st.dataframe(data.isnull().sum(), height=200)
        with col2:
            st.markdown("**πŸ”’ Data Types**")
            st.dataframe(data.dtypes, height=200)

    except Exception as e:
        st.error(f"❌ Failed to read the file: {e}")
        st.stop()

    st.header("πŸ’¬ Ask InsightGenie")
    user_question = st.text_input("Type your question about the dataset here:")

    genie_endpoint = HuggingFaceEndpoint(
        repo_id="deepseek-ai/DeepSeek-R1",
        provider="nebius",
        temperature=0.5,
        max_new_tokens=150,
        task="conversational"
    )

    genie_chatbot = ChatHuggingFace(
        llm=genie_endpoint,
        repo_id=genie_endpoint.repo_id,
        provider=genie_endpoint.provider,
        temperature=0.5,
        max_new_tokens=150,
        task="conversational"
    )

    if user_question:
        sample_data = data.head(50).to_csv(index=False)

        prompt = f"""
You are a skilled data assistant named InsightGenie. A user has uploaded a dataset and asked a question.
Answer clearly. If the question involves charts or graphs, provide appropriate Python code using matplotlib or seaborn.

Here’s a preview of the dataset:
{sample_data}

User question:
{user_question}
"""

        with st.spinner("πŸ” Generating response..."):
            try:
                model_response = genie_chatbot.invoke([{"role": "user", "content": prompt}])
                bot_reply = model_response.content if hasattr(model_response, "content") else model_response

                st.session_state.qa_conversations.append((user_question, bot_reply))

                st.markdown("### 🧠 Genie Says")
                st.write(bot_reply)

                # Auto-plot for simple queries
                if "plot" in user_question.lower():
                    with st.expander("πŸ“Š Auto-generated Plot"):
                        try:
                            numeric_cols = data.select_dtypes(include='number').columns.tolist()
                            if len(numeric_cols) >= 2:
                                fig, ax = plt.subplots()
                                sns.lineplot(data=data, x=numeric_cols[0], y=numeric_cols[1], ax=ax)
                                ax.set_title(f"{numeric_cols[1]} vs {numeric_cols[0]}")
                                st.pyplot(fig)
                            else:
                                st.info("⚠️ Not enough numeric columns to generate a plot.")
                        except Exception as e:
                            st.error(f"❌ Plotting error: {e}")
            except Exception as e:
                st.error(f"❌ Error generating AI response: {e}")

    if st.session_state.qa_conversations:
        st.header("πŸ“š Chat History")
        for user_q, ai_a in reversed(st.session_state.qa_conversations):
            st.markdown(f"**πŸ§‘β€πŸ’» You:** {user_q}")
            st.markdown(f"**πŸ€– InsightGenie:** {ai_a}")
            st.markdown("---")