Spaces:

EditsPaarth
/

AI-Data-Analysis

Sleeping

App Files Files Community

AI-Data-Analysis / app.py

EditsPaarth

Update app.py

8bdb5a8 verified about 1 year ago

raw

history blame contribute delete

6.37 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import seaborn as sns
	import matplotlib.pyplot as plt
	import tempfile
	import subprocess
	from groq import Groq

	# Groq API Key setup
	GROQ_API_KEY = "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"
	client = Groq(api_key=GROQ_API_KEY)

	# Groq Chat Function.
	def chat_with_groq(prompt):
	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "system", "content": "[INSTRUCTIONS DO NOT GENERATE CODE BUT DO THE PROCCESING YOURSELF]"},{"role": "user", "content": prompt}],
	model="llama3-8b-8192",
	stream=False
	)
	print(prompt)
	return chat_completion.choices[0].message.content
	except Exception as e:
	return f"Error fetching response: {e}"

	def generate_code_with_groq(prompt):
	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "```python"}],
	model="gemma-7b-it",
	stream=False,
	stop="```"
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	return f"Error fetching response: {e}"

	# File Parsing Functions
	def parse_file(uploaded_file):
	filename = uploaded_file.name
	if filename.endswith('.csv'):
	return pd.read_csv(uploaded_file)
	elif filename.endswith('.xlsx'):
	return pd.read_excel(uploaded_file)
	else:
	st.error("Unsupported file type! Only CSV and Excel are supported.")
	return None

	# Preprocess DataFrame to Fix Type Issues
	def preprocess_dataframe(df):
	try:
	# Convert problematic columns to string to avoid Arrow serialization issues
	for col in df.columns:
	if df[col].dtype.name == 'object' or df[col].dtype.name == 'category':
	df[col] = df[col].astype(str)
	return df
	except Exception as e:
	st.error(f"Error preprocessing data: {e}")
	return None

	# Analysis Function
	def analyze_data(data, visualization_type):
	st.subheader("Basic Analysis")
	st.write("Shape of Data:", data.shape)


	# Combine numerical and non-numerical summaries
	numeric_data = data.select_dtypes(include=[np.number])

	if visualization_type == "Bar Chart" and not numeric_data.empty:
	st.subheader("Bar Chart")
	x_col = st.selectbox("Select the X-axis column for the Bar Chart (Non-Numeric):", data.columns)
	y_col = st.selectbox("Select the Y-axis column for the Bar Chart (Numeric):", data.columns)

	fig, ax = plt.subplots(figsize=(8, 6))
	data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax)
	ax.set_xlabel(x_col)
	ax.set_ylabel(y_col)
	st.pyplot(fig)

	elif visualization_type == "Line Graph" and not numeric_data.empty:
	st.subheader("Line Graph")
	x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns)
	y_col = st.selectbox("Select the Y-axis column for the Line Graph (Numeric):", numeric_data.columns)

	fig, ax = plt.subplots(figsize=(8, 6))
	ax.plot(data[x_col], data[y_col])
	ax.set_xlabel(x_col)
	ax.set_ylabel(y_col)
	st.pyplot(fig)

	elif visualization_type == "Area Chart" and not numeric_data.empty:
	st.subheader("Area Chart")
	column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns)
	fig, ax = plt.subplots(figsize=(8, 6))
	data[column].plot(kind='area', ax=ax)
	ax.set_xlabel(column)
	ax.set_ylabel("Area")
	st.pyplot(fig)

	else:
	st.warning("The database provided has no numerical data, so it isnt availble for visualisation. But you can chat with it")

	# Automatically generate a prompt for Groq based on the analysis
	prompt = generate_groq_prompt(data, visualization_type)
	return prompt

	# Function to generate a prompt based on the data analysis
	def generate_groq_prompt(data, visualization_type):
	# Convert DataFrame to a string without the index
	data_without_index = data.to_string(index=False)

	prompt = f"""
	Here is the summary statistics for the dataset:
	{data_without_index}

	The user has selected the '{visualization_type}' visualization type.
	Please generate Python code that does this and for any data, please don't use any file input. Write the data in the code.
	"""

	return prompt

	# Streamlit App
	st.title("Data Analysis AI")
	st.markdown("Upload a file (CSV or Excel) to analyze it.")

	uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xlsx'])

	if uploaded_file is not None:
	try:
	data = parse_file(uploaded_file)
	if data is not None:
	data = preprocess_dataframe(data) # Fix serialization issues
	st.subheader("Uploaded Data")
	st.write(data) # Display the full dataset without truncation

	# Visualization Selection
	visualization_type = st.selectbox(
	"Select a visualization type:",
	["Bar Chart", "Line Graph", "Area Chart"]
	)

	# Perform Analysis and Visualization
	prompt = analyze_data(data, visualization_type)



	# Chat with Groq Section
	st.subheader("Chat with Groq")
	chat_input = st.text_area("Ask Groq questions about the data:")
	if st.button("Chat"):
	if chat_input:
	chat_response = chat_with_groq(f"Here is the data:\n{data}\n\n{chat_input}")
	st.write("Groq's Response:")
	st.write(chat_response)

	# Groq Code Generation Section
	st.subheader("Generate Python Code with Groq")
	prompt_input = st.text_area("Describe the analysis or visualization you want to generate code for:")
	if st.button("Generate Code"):
	if prompt_input:
	prompt += f"\n\nUser request: {prompt_input}"
	response = generate_code_with_groq(prompt)

	# Display the Groq response
	st.subheader("Generated Code")
	st.code(response, language="python")
	except Exception as e:
	st.error(f"An error occurred: {e}")