Spaces:

EditsPaarth
/

AI-Data-Analysis

Sleeping

App Files Files Community

EditsPaarth commited on Nov 17, 2024

Commit

b9123fb

verified ·

1 Parent(s): 8215549

app.py

Browse files

Files changed (1) hide show

app.py +210 -0

app.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import tempfile
+import subprocess
+from groq import Groq
+# Groq API Key setup
+GROQ_API_KEY = "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"
+client = Groq(api_key=GROQ_API_KEY)
+# Groq Chat Function
+def chat_with_groq(prompt):
+    try:
+        chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt}],
+            model="gemma-7b-it",
+            stream=False
+        )
+        print(prompt)
+        return chat_completion.choices[0].message.content
+    except Exception as e:
+        return f"Error fetching response: {e}"
+def generate_code_with_groq(prompt):
+    try:
+        chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "```python"}],
+            model="gemma-7b-it",
+            stream=False,
+            stop="```"
+        )
+        return chat_completion.choices[0].message.content
+    except Exception as e:
+        return f"Error fetching response: {e}"
+# File Parsing Functions
+def parse_file(uploaded_file):
+    filename = uploaded_file.name
+    if filename.endswith('.csv'):
+        return pd.read_csv(uploaded_file)
+    elif filename.endswith('.xlsx'):
+        return pd.read_excel(uploaded_file)
+    else:
+        st.error("Unsupported file type! Only CSV and Excel are supported.")
+        return None
+# Preprocess DataFrame to Fix Type Issues
+def preprocess_dataframe(df):
+    try:
+        # Convert problematic columns to string to avoid Arrow serialization issues
+        for col in df.columns:
+            if df[col].dtype.name == 'object' or df[col].dtype.name == 'category':
+                df[col] = df[col].astype(str)
+        return df
+    except Exception as e:
+        st.error(f"Error preprocessing data: {e}")
+        return None
+# Analysis Function
+def analyze_data(data, visualization_type, class_size=10):
+    st.subheader("Basic Analysis")
+    st.write("Shape of Data:", data.shape)
+    st.write("Data Types:")
+    st.write(data.dtypes)
+    # Combine numerical and non-numerical summaries
+    st.write("Summary Statistics:")
+    combined_stats = pd.concat(
+        [
+            data.describe(include=[np.number]),
+            data.describe(include=['object', 'category'])
+        ],
+        axis=1
+    )
+    st.write(combined_stats)
+    numeric_data = data.select_dtypes(include=[np.number])
+    # Visualization logic
+    if visualization_type == "Heatmap" and not numeric_data.empty:
+        st.subheader("Correlation Heatmap")
+        fig, ax = plt.subplots(figsize=(8, 6))
+        sns.heatmap(numeric_data.corr(), annot=True, ax=ax, cmap="coolwarm", fmt=".2f")
+        st.pyplot(fig)
+    elif visualization_type == "Bar Chart" and not numeric_data.empty:
+        st.subheader("Bar Chart")
+        x_col = st.selectbox("Select the X-axis column for the Bar Chart:", data.columns)
+        y_col = st.selectbox("Select the Y-axis column for the Bar Chart:", data.columns)
+        fig, ax = plt.subplots(figsize=(8, 6))
+        data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax)
+        ax.set_xlabel(x_col)
+        ax.set_ylabel(y_col)
+        st.pyplot(fig)
+    elif visualization_type == "Line Graph" and not numeric_data.empty:
+        st.subheader("Line Graph")
+        x_col = st.selectbox("Select the X-axis column for the Line Graph:", numeric_data.columns)
+        y_col = st.selectbox("Select the Y-axis column for the Line Graph:", numeric_data.columns)
+        fig, ax = plt.subplots(figsize=(8, 6))
+        ax.plot(data[x_col], data[y_col])
+        ax.set_xlabel(x_col)
+        ax.set_ylabel(y_col)
+        st.pyplot(fig)
+    elif visualization_type == "Scatter Plot" and not numeric_data.empty:
+        st.subheader("Scatter Plot")
+        x_col = st.selectbox("Select the X-axis column for the Scatter Plot:", numeric_data.columns)
+        y_col = st.selectbox("Select the Y-axis column for the Scatter Plot:", numeric_data.columns)
+        fig, ax = plt.subplots(figsize=(8, 6))
+        ax.scatter(data[x_col], data[y_col])
+        ax.set_xlabel(x_col)
+        ax.set_ylabel(y_col)
+        st.pyplot(fig)
+    elif visualization_type == "Histogram" and not numeric_data.empty:
+        st.subheader("Histogram")
+        column = st.selectbox("Select a column for the Histogram:", numeric_data.columns)
+        fig, ax = plt.subplots(figsize=(8, 6))
+        data[column].plot(kind='hist', bins=class_size, ax=ax)
+        ax.set_xlabel(column)
+        ax.set_ylabel("Frequency")
+        st.pyplot(fig)
+    elif visualization_type == "Area Chart" and not numeric_data.empty:
+        st.subheader("Area Chart")
+        column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns)
+        fig, ax = plt.subplots(figsize=(8, 6))
+        data[column].plot(kind='area', ax=ax)
+        ax.set_xlabel(column)
+        ax.set_ylabel("Area")
+        st.pyplot(fig)
+    else:
+        st.warning("No valid visualization option selected or data available.")
+    # Automatically generate a prompt for Groq based on the analysis
+    prompt = generate_groq_prompt(data, visualization_type, class_size)
+    return prompt
+# Function to generate a prompt based on the data analysis
+def generate_groq_prompt(data, visualization_type, class_size):
+    # Convert DataFrame to a string without the index
+    data_without_index = data.to_string(index=False)
+    prompt = f"""
+    Here is the summary statistics for the dataset:
+    {data_without_index}
+    The user has selected the '{visualization_type}' visualization type with a class size of {class_size}.
+    Please generate Python code that does this and for any data, please don't use any file input. Write the data in the code.
+    """
+    return prompt
+# Streamlit App
+st.title("Data Analysis AI")
+st.markdown("Upload a file (CSV or Excel) to analyze it.")
+uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xlsx'])
+if uploaded_file is not None:
+    try:
+        data = parse_file(uploaded_file)
+        if data is not None:
+            data = preprocess_dataframe(data)  # Fix serialization issues
+            st.subheader("Uploaded Data")
+            st.write(data.head())
+            # Visualization Selection
+            visualization_type = st.selectbox(
+                "Select a visualization type:",
+                ["Heatmap", "Bar Chart", "Line Graph", "Scatter Plot", "Histogram", "Area Chart"]
+            )
+            # User input for class size customization
+            class_size = st.slider("Select the class size for certain plots (e.g., Histogram)", 5, 50, 10)
+            # Perform Analysis and Visualization
+            prompt = analyze_data(data, visualization_type, class_size)
+            st.text(f"Prompt sent to Groq:\n{prompt}")
+            # Chat with Groq Section
+            st.subheader("Chat with Groq")
+            chat_input = st.text_area("Ask Groq questions about the data:")
+            if st.button("Chat"):
+                if chat_input:
+                    chat_response = chat_with_groq(f"Here is the data:\n{data}\n\n{chat_input}")
+                    st.write("Groq's Response:")
+                    st.write(chat_response)
+            # Groq Code Generation Section
+            st.subheader("Generate Python Code with Groq")
+            prompt_input = st.text_area("Describe the analysis or visualization you want to generate code for:")
+            if st.button("Generate Code"):
+                if prompt_input:
+                    prompt += f"\n\nUser request: {prompt_input}"
+                response = generate_code_with_groq(prompt)
+                # Display the Groq response
+                st.subheader("Generated Code")
+                st.code(response, language="python")
+    except Exception as e:
+        st.error(f"An error occurred: {e}")