Spaces:
Sleeping
Sleeping
File size: 6,366 Bytes
b9123fb e83d0d8 b9123fb b3567b6 b9123fb 8bdb5a8 b9123fb 85c6abe 9e6e1b6 b9123fb e83d0d8 b9123fb 6720592 b9123fb 6720592 b9123fb e83d0d8 b9123fb ceb467e b9123fb 19661f8 b9123fb 5cf8a18 237c686 c1da72d b9123fb c1da72d b9123fb e83d0d8 b9123fb c1da72d b9123fb e83d0d8 b9123fb e83d0d8 b9123fb 3d4ecb5 b9123fb ceb467e b9123fb e83d0d8 ceb467e e83d0d8 b9123fb e83d0d8 39db6c7 b3567b6 e83d0d8 b9123fb 39db6c7 b9123fb b3567b6 b9123fb b3567b6 b9123fb e83d0d8 ceb467e 19661f8 e83d0d8 b9123fb 64f4108 6720592 b3567b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tempfile
import subprocess
from groq import Groq
# Groq API Key setup
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)
# Groq Chat Function.
def chat_with_groq(prompt):
try:
chat_completion = client.chat.completions.create(
messages=[{"role": "system", "content": "[INSTRUCTIONS DO NOT GENERATE CODE BUT DO THE PROCCESING YOURSELF]"},{"role": "user", "content": prompt}],
model="llama3-8b-8192",
stream=False
)
print(prompt)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error fetching response: {e}"
def generate_code_with_groq(prompt):
try:
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "```python"}],
model="gemma-7b-it",
stream=False,
stop="```"
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error fetching response: {e}"
# File Parsing Functions
def parse_file(uploaded_file):
filename = uploaded_file.name
if filename.endswith('.csv'):
return pd.read_csv(uploaded_file)
elif filename.endswith('.xlsx'):
return pd.read_excel(uploaded_file)
else:
st.error("Unsupported file type! Only CSV and Excel are supported.")
return None
# Preprocess DataFrame to Fix Type Issues
def preprocess_dataframe(df):
try:
# Convert problematic columns to string to avoid Arrow serialization issues
for col in df.columns:
if df[col].dtype.name == 'object' or df[col].dtype.name == 'category':
df[col] = df[col].astype(str)
return df
except Exception as e:
st.error(f"Error preprocessing data: {e}")
return None
# Analysis Function
def analyze_data(data, visualization_type):
st.subheader("Basic Analysis")
st.write("Shape of Data:", data.shape)
# Combine numerical and non-numerical summaries
numeric_data = data.select_dtypes(include=[np.number])
if visualization_type == "Bar Chart" and not numeric_data.empty:
st.subheader("Bar Chart")
x_col = st.selectbox("Select the X-axis column for the Bar Chart (Non-Numeric):", data.columns)
y_col = st.selectbox("Select the Y-axis column for the Bar Chart (Numeric):", data.columns)
fig, ax = plt.subplots(figsize=(8, 6))
data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax)
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
st.pyplot(fig)
elif visualization_type == "Line Graph" and not numeric_data.empty:
st.subheader("Line Graph")
x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns)
y_col = st.selectbox("Select the Y-axis column for the Line Graph (Numeric):", numeric_data.columns)
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(data[x_col], data[y_col])
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
st.pyplot(fig)
elif visualization_type == "Area Chart" and not numeric_data.empty:
st.subheader("Area Chart")
column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns)
fig, ax = plt.subplots(figsize=(8, 6))
data[column].plot(kind='area', ax=ax)
ax.set_xlabel(column)
ax.set_ylabel("Area")
st.pyplot(fig)
else:
st.warning("The database provided has no numerical data, so it isnt availble for visualisation. But you can chat with it")
# Automatically generate a prompt for Groq based on the analysis
prompt = generate_groq_prompt(data, visualization_type)
return prompt
# Function to generate a prompt based on the data analysis
def generate_groq_prompt(data, visualization_type):
# Convert DataFrame to a string without the index
data_without_index = data.to_string(index=False)
prompt = f"""
Here is the summary statistics for the dataset:
{data_without_index}
The user has selected the '{visualization_type}' visualization type.
Please generate Python code that does this and for any data, please don't use any file input. Write the data in the code.
"""
return prompt
# Streamlit App
st.title("Data Analysis AI")
st.markdown("Upload a file (CSV or Excel) to analyze it.")
uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xlsx'])
if uploaded_file is not None:
try:
data = parse_file(uploaded_file)
if data is not None:
data = preprocess_dataframe(data) # Fix serialization issues
st.subheader("Uploaded Data")
st.write(data) # Display the full dataset without truncation
# Visualization Selection
visualization_type = st.selectbox(
"Select a visualization type:",
["Bar Chart", "Line Graph", "Area Chart"]
)
# Perform Analysis and Visualization
prompt = analyze_data(data, visualization_type)
# Chat with Groq Section
st.subheader("Chat with Groq")
chat_input = st.text_area("Ask Groq questions about the data:")
if st.button("Chat"):
if chat_input:
chat_response = chat_with_groq(f"Here is the data:\n{data}\n\n{chat_input}")
st.write("Groq's Response:")
st.write(chat_response)
# Groq Code Generation Section
st.subheader("Generate Python Code with Groq")
prompt_input = st.text_area("Describe the analysis or visualization you want to generate code for:")
if st.button("Generate Code"):
if prompt_input:
prompt += f"\n\nUser request: {prompt_input}"
response = generate_code_with_groq(prompt)
# Display the Groq response
st.subheader("Generated Code")
st.code(response, language="python")
except Exception as e:
st.error(f"An error occurred: {e}")
|