Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import tempfile | |
| import subprocess | |
| from groq import Groq | |
| # Groq API Key setup | |
| GROQ_API_KEY = "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" | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Groq Chat Function. | |
| def chat_with_groq(prompt): | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "system", "content": "[INSTRUCTIONS DO NOT GENERATE CODE BUT DO THE PROCCESING YOURSELF]"},{"role": "user", "content": prompt}], | |
| model="llama3-8b-8192", | |
| stream=False | |
| ) | |
| print(prompt) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| return f"Error fetching response: {e}" | |
| def generate_code_with_groq(prompt): | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "```python"}], | |
| model="gemma-7b-it", | |
| stream=False, | |
| stop="```" | |
| ) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| return f"Error fetching response: {e}" | |
| # File Parsing Functions | |
| def parse_file(uploaded_file): | |
| filename = uploaded_file.name | |
| if filename.endswith('.csv'): | |
| return pd.read_csv(uploaded_file) | |
| elif filename.endswith('.xlsx'): | |
| return pd.read_excel(uploaded_file) | |
| else: | |
| st.error("Unsupported file type! Only CSV and Excel are supported.") | |
| return None | |
| # Preprocess DataFrame to Fix Type Issues | |
| def preprocess_dataframe(df): | |
| try: | |
| # Convert problematic columns to string to avoid Arrow serialization issues | |
| for col in df.columns: | |
| if df[col].dtype.name == 'object' or df[col].dtype.name == 'category': | |
| df[col] = df[col].astype(str) | |
| return df | |
| except Exception as e: | |
| st.error(f"Error preprocessing data: {e}") | |
| return None | |
| # Analysis Function | |
| def analyze_data(data, visualization_type): | |
| st.subheader("Basic Analysis") | |
| st.write("Shape of Data:", data.shape) | |
| # Combine numerical and non-numerical summaries | |
| numeric_data = data.select_dtypes(include=[np.number]) | |
| if visualization_type == "Bar Chart" and not numeric_data.empty: | |
| st.subheader("Bar Chart") | |
| x_col = st.selectbox("Select the X-axis column for the Bar Chart (Non-Numeric):", data.columns) | |
| y_col = st.selectbox("Select the Y-axis column for the Bar Chart (Numeric):", data.columns) | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax) | |
| ax.set_xlabel(x_col) | |
| ax.set_ylabel(y_col) | |
| st.pyplot(fig) | |
| elif visualization_type == "Line Graph" and not numeric_data.empty: | |
| st.subheader("Line Graph") | |
| x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns) | |
| y_col = st.selectbox("Select the Y-axis column for the Line Graph (Numeric):", numeric_data.columns) | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| ax.plot(data[x_col], data[y_col]) | |
| ax.set_xlabel(x_col) | |
| ax.set_ylabel(y_col) | |
| st.pyplot(fig) | |
| elif visualization_type == "Area Chart" and not numeric_data.empty: | |
| st.subheader("Area Chart") | |
| column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns) | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| data[column].plot(kind='area', ax=ax) | |
| ax.set_xlabel(column) | |
| ax.set_ylabel("Area") | |
| st.pyplot(fig) | |
| else: | |
| st.warning("The database provided has no numerical data, so it isnt availble for visualisation. But you can chat with it") | |
| # Automatically generate a prompt for Groq based on the analysis | |
| prompt = generate_groq_prompt(data, visualization_type) | |
| return prompt | |
| # Function to generate a prompt based on the data analysis | |
| def generate_groq_prompt(data, visualization_type): | |
| # Convert DataFrame to a string without the index | |
| data_without_index = data.to_string(index=False) | |
| prompt = f""" | |
| Here is the summary statistics for the dataset: | |
| {data_without_index} | |
| The user has selected the '{visualization_type}' visualization type. | |
| Please generate Python code that does this and for any data, please don't use any file input. Write the data in the code. | |
| """ | |
| return prompt | |
| # Streamlit App | |
| st.title("Data Analysis AI") | |
| st.markdown("Upload a file (CSV or Excel) to analyze it.") | |
| uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xlsx']) | |
| if uploaded_file is not None: | |
| try: | |
| data = parse_file(uploaded_file) | |
| if data is not None: | |
| data = preprocess_dataframe(data) # Fix serialization issues | |
| st.subheader("Uploaded Data") | |
| st.write(data) # Display the full dataset without truncation | |
| # Visualization Selection | |
| visualization_type = st.selectbox( | |
| "Select a visualization type:", | |
| ["Bar Chart", "Line Graph", "Area Chart"] | |
| ) | |
| # Perform Analysis and Visualization | |
| prompt = analyze_data(data, visualization_type) | |
| # Chat with Groq Section | |
| st.subheader("Chat with Groq") | |
| chat_input = st.text_area("Ask Groq questions about the data:") | |
| if st.button("Chat"): | |
| if chat_input: | |
| chat_response = chat_with_groq(f"Here is the data:\n{data}\n\n{chat_input}") | |
| st.write("Groq's Response:") | |
| st.write(chat_response) | |
| # Groq Code Generation Section | |
| st.subheader("Generate Python Code with Groq") | |
| prompt_input = st.text_area("Describe the analysis or visualization you want to generate code for:") | |
| if st.button("Generate Code"): | |
| if prompt_input: | |
| prompt += f"\n\nUser request: {prompt_input}" | |
| response = generate_code_with_groq(prompt) | |
| # Display the Groq response | |
| st.subheader("Generated Code") | |
| st.code(response, language="python") | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |