EditsPaarth's picture
Update app.py
8bdb5a8 verified
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tempfile
import subprocess
from groq import Groq
# Groq API Key setup
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)
# Groq Chat Function.
def chat_with_groq(prompt):
try:
chat_completion = client.chat.completions.create(
messages=[{"role": "system", "content": "[INSTRUCTIONS DO NOT GENERATE CODE BUT DO THE PROCCESING YOURSELF]"},{"role": "user", "content": prompt}],
model="llama3-8b-8192",
stream=False
)
print(prompt)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error fetching response: {e}"
def generate_code_with_groq(prompt):
try:
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "```python"}],
model="gemma-7b-it",
stream=False,
stop="```"
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error fetching response: {e}"
# File Parsing Functions
def parse_file(uploaded_file):
filename = uploaded_file.name
if filename.endswith('.csv'):
return pd.read_csv(uploaded_file)
elif filename.endswith('.xlsx'):
return pd.read_excel(uploaded_file)
else:
st.error("Unsupported file type! Only CSV and Excel are supported.")
return None
# Preprocess DataFrame to Fix Type Issues
def preprocess_dataframe(df):
try:
# Convert problematic columns to string to avoid Arrow serialization issues
for col in df.columns:
if df[col].dtype.name == 'object' or df[col].dtype.name == 'category':
df[col] = df[col].astype(str)
return df
except Exception as e:
st.error(f"Error preprocessing data: {e}")
return None
# Analysis Function
def analyze_data(data, visualization_type):
st.subheader("Basic Analysis")
st.write("Shape of Data:", data.shape)
# Combine numerical and non-numerical summaries
numeric_data = data.select_dtypes(include=[np.number])
if visualization_type == "Bar Chart" and not numeric_data.empty:
st.subheader("Bar Chart")
x_col = st.selectbox("Select the X-axis column for the Bar Chart (Non-Numeric):", data.columns)
y_col = st.selectbox("Select the Y-axis column for the Bar Chart (Numeric):", data.columns)
fig, ax = plt.subplots(figsize=(8, 6))
data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax)
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
st.pyplot(fig)
elif visualization_type == "Line Graph" and not numeric_data.empty:
st.subheader("Line Graph")
x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns)
y_col = st.selectbox("Select the Y-axis column for the Line Graph (Numeric):", numeric_data.columns)
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(data[x_col], data[y_col])
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
st.pyplot(fig)
elif visualization_type == "Area Chart" and not numeric_data.empty:
st.subheader("Area Chart")
column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns)
fig, ax = plt.subplots(figsize=(8, 6))
data[column].plot(kind='area', ax=ax)
ax.set_xlabel(column)
ax.set_ylabel("Area")
st.pyplot(fig)
else:
st.warning("The database provided has no numerical data, so it isnt availble for visualisation. But you can chat with it")
# Automatically generate a prompt for Groq based on the analysis
prompt = generate_groq_prompt(data, visualization_type)
return prompt
# Function to generate a prompt based on the data analysis
def generate_groq_prompt(data, visualization_type):
# Convert DataFrame to a string without the index
data_without_index = data.to_string(index=False)
prompt = f"""
Here is the summary statistics for the dataset:
{data_without_index}
The user has selected the '{visualization_type}' visualization type.
Please generate Python code that does this and for any data, please don't use any file input. Write the data in the code.
"""
return prompt
# Streamlit App
st.title("Data Analysis AI")
st.markdown("Upload a file (CSV or Excel) to analyze it.")
uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xlsx'])
if uploaded_file is not None:
try:
data = parse_file(uploaded_file)
if data is not None:
data = preprocess_dataframe(data) # Fix serialization issues
st.subheader("Uploaded Data")
st.write(data) # Display the full dataset without truncation
# Visualization Selection
visualization_type = st.selectbox(
"Select a visualization type:",
["Bar Chart", "Line Graph", "Area Chart"]
)
# Perform Analysis and Visualization
prompt = analyze_data(data, visualization_type)
# Chat with Groq Section
st.subheader("Chat with Groq")
chat_input = st.text_area("Ask Groq questions about the data:")
if st.button("Chat"):
if chat_input:
chat_response = chat_with_groq(f"Here is the data:\n{data}\n\n{chat_input}")
st.write("Groq's Response:")
st.write(chat_response)
# Groq Code Generation Section
st.subheader("Generate Python Code with Groq")
prompt_input = st.text_area("Describe the analysis or visualization you want to generate code for:")
if st.button("Generate Code"):
if prompt_input:
prompt += f"\n\nUser request: {prompt_input}"
response = generate_code_with_groq(prompt)
# Display the Groq response
st.subheader("Generated Code")
st.code(response, language="python")
except Exception as e:
st.error(f"An error occurred: {e}")