File size: 6,366 Bytes
b9123fb
 
 
 
 
e83d0d8
 
b9123fb
 
b3567b6
b9123fb
 
 
8bdb5a8
b9123fb
 
 
85c6abe
9e6e1b6
b9123fb
 
e83d0d8
b9123fb
 
 
 
 
 
 
6720592
b9123fb
 
6720592
b9123fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e83d0d8
b9123fb
 
 
 
 
 
 
 
 
ceb467e
b9123fb
 
19661f8
b9123fb
 
5cf8a18
237c686
c1da72d
b9123fb
c1da72d
 
b9123fb
 
 
 
 
 
e83d0d8
b9123fb
 
c1da72d
 
b9123fb
 
 
 
 
 
e83d0d8
b9123fb
 
 
 
 
 
 
 
e83d0d8
b9123fb
3d4ecb5
b9123fb
 
ceb467e
b9123fb
 
e83d0d8
ceb467e
e83d0d8
 
 
b9123fb
e83d0d8
 
39db6c7
b3567b6
e83d0d8
b9123fb
 
39db6c7
 
b9123fb
 
 
 
 
 
 
 
 
 
 
 
b3567b6
b9123fb
 
 
 
b3567b6
b9123fb
 
e83d0d8
ceb467e
 
19661f8
e83d0d8
 
 
 
 
 
 
 
 
b9123fb
 
 
 
 
 
 
 
 
 
 
 
 
 
64f4108
6720592
 
 
b3567b6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tempfile
import subprocess
from groq import Groq

# Groq API Key setup
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)

# Groq Chat Function.
def chat_with_groq(prompt):
    try:
        chat_completion = client.chat.completions.create(
            messages=[{"role": "system", "content": "[INSTRUCTIONS DO NOT GENERATE CODE BUT DO THE PROCCESING YOURSELF]"},{"role": "user", "content": prompt}],
            model="llama3-8b-8192",
            stream=False
        )
        print(prompt)
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error fetching response: {e}"

def generate_code_with_groq(prompt):
    try:
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "```python"}],
            model="gemma-7b-it",
            stream=False,
            stop="```"
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error fetching response: {e}"

# File Parsing Functions
def parse_file(uploaded_file):
    filename = uploaded_file.name
    if filename.endswith('.csv'):
        return pd.read_csv(uploaded_file)
    elif filename.endswith('.xlsx'):
        return pd.read_excel(uploaded_file)
    else:
        st.error("Unsupported file type! Only CSV and Excel are supported.")
        return None

# Preprocess DataFrame to Fix Type Issues
def preprocess_dataframe(df):
    try:
        # Convert problematic columns to string to avoid Arrow serialization issues
        for col in df.columns:
            if df[col].dtype.name == 'object' or df[col].dtype.name == 'category':
                df[col] = df[col].astype(str)
        return df
    except Exception as e:
        st.error(f"Error preprocessing data: {e}")
        return None

# Analysis Function
def analyze_data(data, visualization_type):
    st.subheader("Basic Analysis")
    st.write("Shape of Data:", data.shape)


    # Combine numerical and non-numerical summaries
    numeric_data = data.select_dtypes(include=[np.number])

    if visualization_type == "Bar Chart" and not numeric_data.empty:
        st.subheader("Bar Chart")
        x_col = st.selectbox("Select the X-axis column for the Bar Chart (Non-Numeric):", data.columns)
        y_col = st.selectbox("Select the Y-axis column for the Bar Chart (Numeric):", data.columns)
        
        fig, ax = plt.subplots(figsize=(8, 6))
        data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax)
        ax.set_xlabel(x_col)
        ax.set_ylabel(y_col)
        st.pyplot(fig)

    elif visualization_type == "Line Graph" and not numeric_data.empty:
        st.subheader("Line Graph")
        x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns)
        y_col = st.selectbox("Select the Y-axis column for the Line Graph (Numeric):", numeric_data.columns)

        fig, ax = plt.subplots(figsize=(8, 6))
        ax.plot(data[x_col], data[y_col])
        ax.set_xlabel(x_col)
        ax.set_ylabel(y_col)
        st.pyplot(fig)

    elif visualization_type == "Area Chart" and not numeric_data.empty:
        st.subheader("Area Chart")
        column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns)
        fig, ax = plt.subplots(figsize=(8, 6))
        data[column].plot(kind='area', ax=ax)
        ax.set_xlabel(column)
        ax.set_ylabel("Area")
        st.pyplot(fig)

    else:
        st.warning("The database provided has no numerical data, so it isnt availble for visualisation. But you can chat with it")

    # Automatically generate a prompt for Groq based on the analysis
    prompt = generate_groq_prompt(data, visualization_type)
    return prompt

# Function to generate a prompt based on the data analysis
def generate_groq_prompt(data, visualization_type):
    # Convert DataFrame to a string without the index
    data_without_index = data.to_string(index=False)
    
    prompt = f"""
    Here is the summary statistics for the dataset:
    {data_without_index}

    The user has selected the '{visualization_type}' visualization type.
    Please generate Python code that does this and for any data, please don't use any file input. Write the data in the code.
    """

    return prompt

# Streamlit App
st.title("Data Analysis AI")
st.markdown("Upload a file (CSV or Excel) to analyze it.")

uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xlsx'])

if uploaded_file is not None:
    try:
        data = parse_file(uploaded_file)
        if data is not None:
            data = preprocess_dataframe(data)  # Fix serialization issues
            st.subheader("Uploaded Data")
            st.write(data)  # Display the full dataset without truncation

            # Visualization Selection
            visualization_type = st.selectbox(
                "Select a visualization type:",
                ["Bar Chart", "Line Graph", "Area Chart"]
            )

            # Perform Analysis and Visualization
            prompt = analyze_data(data, visualization_type)

  

            # Chat with Groq Section
            st.subheader("Chat with Groq")
            chat_input = st.text_area("Ask Groq questions about the data:")
            if st.button("Chat"):
                if chat_input:
                    chat_response = chat_with_groq(f"Here is the data:\n{data}\n\n{chat_input}")
                    st.write("Groq's Response:")
                    st.write(chat_response)

            # Groq Code Generation Section
            st.subheader("Generate Python Code with Groq")
            prompt_input = st.text_area("Describe the analysis or visualization you want to generate code for:")
            if st.button("Generate Code"):
                if prompt_input:
                    prompt += f"\n\nUser request: {prompt_input}"
                response = generate_code_with_groq(prompt)

                # Display the Groq response
                st.subheader("Generated Code")
                st.code(response, language="python")
    except Exception as e:
        st.error(f"An error occurred: {e}")