dataAnalysis / app.py
zainulabedin949's picture
Update app.py
636fc8e verified
import os
import pandas as pd
import numpy as np
import streamlit as st
import requests
# Groq API Setup
API_KEY = "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"
API_URL = "https://api.groq.com/openai/v1/chat/completions" # Updated API URL
def analyze_file(uploaded_file):
try:
# Load the file into a pandas DataFrame (optimize memory usage)
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file, dtype={'Gain (dB)': 'float32', 'Frequency (GHz)': 'float32'})
elif uploaded_file.name.endswith('.xlsx'):
df = pd.read_excel(uploaded_file, dtype={'Gain (dB)': 'float32', 'Frequency (GHz)': 'float32'})
else:
return "Error: The uploaded file is neither CSV nor Excel."
# Clean up the column names by stripping any leading/trailing spaces
df.columns = df.columns.str.strip()
# Display the column names and first few rows for debugging
st.write("Columns in the uploaded file:", df.columns)
st.write("Preview of the uploaded data:", df.head())
# Check if required columns are present
if 'Gain (dB)' in df.columns and 'Frequency (GHz)' in df.columns:
# Handle NaN values by replacing them with the mean of the column
df['Gain (dB)'].fillna(df['Gain (dB)'].mean(), inplace=True)
df['Frequency (GHz)'].fillna(df['Frequency (GHz)'].mean(), inplace=True)
# Convert pandas columns to numpy arrays before performing operations
gain_values = np.array(df['Gain (dB)'])
freq_values = np.array(df['Frequency (GHz)'])
# Handle infinite values by replacing them with NaN and then replacing NaNs with 0
gain_values[np.isinf(gain_values)] = np.nan
freq_values[np.isinf(freq_values)] = np.nan
gain_values = np.nan_to_num(gain_values, nan=0) # Replace NaNs with 0
freq_values = np.nan_to_num(freq_values, nan=0) # Replace NaNs with 0
# Perform basic data analysis using optimized NumPy functions
mean_gain = np.mean(gain_values)
median_gain = np.median(gain_values)
std_dev_gain = np.std(gain_values)
# Display analysis results
st.write(f"Mean Gain: {mean_gain}")
st.write(f"Median Gain: {median_gain}")
st.write(f"Standard Deviation of Gain: {std_dev_gain}")
# Send summary to Groq API for analysis
data_summary = f"""
The dataset contains simulation results for antennas. The frequency range is from 1 GHz to 10 GHz.
- The antenna's gain increases from 5 dB to 30 dB as frequency increases.
- Efficiency is consistently above 90%, with the highest reaching 99%.
"""
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"messages": [{"role": "user", "content": data_summary}],
"model": "llama-3.3-70b-versatile" # Ensure this model is supported by Groq
}
# Send the request to Groq API
response = requests.post(API_URL, json=payload, headers=headers)
if response.status_code == 200:
groq_analysis = response.json()["choices"][0]["message"]["content"]
st.write("Groq's Analysis:")
st.write(groq_analysis)
else:
st.write(f"Error: {response.status_code}, {response.text}")
else:
return "Error: Required columns 'Gain (dB)' or 'Frequency (GHz)' not found in the dataset."
except Exception as e:
# Return error message if something goes wrong
st.write(f"An error occurred: {str(e)}")
# Streamlit Interface
st.title("Data Analysis")
st.write("Upload a Excel file to analyze the antenna data and get insights.")
# File upload
uploaded_file = st.file_uploader("Choose a file", type=["xlsx", "csv"])
if uploaded_file is not None:
results = analyze_file(uploaded_file)
if isinstance(results, tuple): # If it's a valid result (tuple)
mean_gain, median_gain, std_dev_gain, groq_analysis = results
st.write(f"Mean Gain: {mean_gain}")
st.write(f"Median Gain: {median_gain}")
st.write(f"Standard Deviation of Gain: {std_dev_gain}")
st.write("Zain's Analysis:")
st.write(groq_analysis)
else:
st.write(results) # Error message