import os import pandas as pd import numpy as np import streamlit as st import requests # Groq API Setup API_KEY = "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" API_URL = "https://api.groq.com/openai/v1/chat/completions" # Updated API URL def analyze_file(uploaded_file): try: # Load the file into a pandas DataFrame (optimize memory usage) if uploaded_file.name.endswith('.csv'): df = pd.read_csv(uploaded_file, dtype={'Gain (dB)': 'float32', 'Frequency (GHz)': 'float32'}) elif uploaded_file.name.endswith('.xlsx'): df = pd.read_excel(uploaded_file, dtype={'Gain (dB)': 'float32', 'Frequency (GHz)': 'float32'}) else: return "Error: The uploaded file is neither CSV nor Excel." # Clean up the column names by stripping any leading/trailing spaces df.columns = df.columns.str.strip() # Display the column names and first few rows for debugging st.write("Columns in the uploaded file:", df.columns) st.write("Preview of the uploaded data:", df.head()) # Check if required columns are present if 'Gain (dB)' in df.columns and 'Frequency (GHz)' in df.columns: # Handle NaN values by replacing them with the mean of the column df['Gain (dB)'].fillna(df['Gain (dB)'].mean(), inplace=True) df['Frequency (GHz)'].fillna(df['Frequency (GHz)'].mean(), inplace=True) # Convert pandas columns to numpy arrays before performing operations gain_values = np.array(df['Gain (dB)']) freq_values = np.array(df['Frequency (GHz)']) # Handle infinite values by replacing them with NaN and then replacing NaNs with 0 gain_values[np.isinf(gain_values)] = np.nan freq_values[np.isinf(freq_values)] = np.nan gain_values = np.nan_to_num(gain_values, nan=0) # Replace NaNs with 0 freq_values = np.nan_to_num(freq_values, nan=0) # Replace NaNs with 0 # Perform basic data analysis using optimized NumPy functions mean_gain = np.mean(gain_values) median_gain = np.median(gain_values) std_dev_gain = np.std(gain_values) # Display analysis results st.write(f"Mean Gain: {mean_gain}") st.write(f"Median Gain: {median_gain}") st.write(f"Standard Deviation of Gain: {std_dev_gain}") # Send summary to Groq API for analysis data_summary = f""" The dataset contains simulation results for antennas. The frequency range is from 1 GHz to 10 GHz. - The antenna's gain increases from 5 dB to 30 dB as frequency increases. - Efficiency is consistently above 90%, with the highest reaching 99%. """ headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } payload = { "messages": [{"role": "user", "content": data_summary}], "model": "llama-3.3-70b-versatile" # Ensure this model is supported by Groq } # Send the request to Groq API response = requests.post(API_URL, json=payload, headers=headers) if response.status_code == 200: groq_analysis = response.json()["choices"][0]["message"]["content"] st.write("Groq's Analysis:") st.write(groq_analysis) else: st.write(f"Error: {response.status_code}, {response.text}") else: return "Error: Required columns 'Gain (dB)' or 'Frequency (GHz)' not found in the dataset." except Exception as e: # Return error message if something goes wrong st.write(f"An error occurred: {str(e)}") # Streamlit Interface st.title("Data Analysis") st.write("Upload a Excel file to analyze the antenna data and get insights.") # File upload uploaded_file = st.file_uploader("Choose a file", type=["xlsx", "csv"]) if uploaded_file is not None: results = analyze_file(uploaded_file) if isinstance(results, tuple): # If it's a valid result (tuple) mean_gain, median_gain, std_dev_gain, groq_analysis = results st.write(f"Mean Gain: {mean_gain}") st.write(f"Median Gain: {median_gain}") st.write(f"Standard Deviation of Gain: {std_dev_gain}") st.write("Zain's Analysis:") st.write(groq_analysis) else: st.write(results) # Error message