Spaces:

zainulabedin949
/

dataAnalysis

Sleeping

File size: 4,583 Bytes

44ae8b8
 
 
 
 
 
 
 
 
 
 
 
d1c2247
44ae8b8
d1c2247
44ae8b8
d1c2247
44ae8b8
 
 
 
 
 
 
 
 
 
 
d1c2247
 
7d5a329
 
e66b2dd
9722cdb
 
 
 
f06d845
15d295f
 
 
 
 
 
d1c2247
9722cdb
 
 
44ae8b8
 
 
 
 
 
d1c2247
 
 
 
 
44ae8b8
 
 
 
 
 
 
 
d1c2247
44ae8b8
 
 
 
 
 
d1c2247
 
 
44ae8b8
635816f
44ae8b8
d1c2247
44ae8b8
 
 
635816f
44ae8b8
 
 
636fc8e
 
44ae8b8
 
 
 
 
 
 
 
d1c2247
44ae8b8
 
 
 
 
636fc8e
d1c2247
44ae8b8

import os
import pandas as pd
import numpy as np
import streamlit as st
import requests

# Groq API Setup
API_KEY = "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"
API_URL = "https://api.groq.com/openai/v1/chat/completions"  # Updated API URL

def analyze_file(uploaded_file):
    try:
        # Load the file into a pandas DataFrame (optimize memory usage)
        if uploaded_file.name.endswith('.csv'):
            df = pd.read_csv(uploaded_file, dtype={'Gain (dB)': 'float32', 'Frequency (GHz)': 'float32'})
        elif uploaded_file.name.endswith('.xlsx'):
            df = pd.read_excel(uploaded_file, dtype={'Gain (dB)': 'float32', 'Frequency (GHz)': 'float32'})
        else:
            return "Error: The uploaded file is neither CSV nor Excel."

        # Clean up the column names by stripping any leading/trailing spaces
        df.columns = df.columns.str.strip()

        # Display the column names and first few rows for debugging
        st.write("Columns in the uploaded file:", df.columns)
        st.write("Preview of the uploaded data:", df.head())

        # Check if required columns are present
        if 'Gain (dB)' in df.columns and 'Frequency (GHz)' in df.columns:
            # Handle NaN values by replacing them with the mean of the column
            df['Gain (dB)'].fillna(df['Gain (dB)'].mean(), inplace=True)
            df['Frequency (GHz)'].fillna(df['Frequency (GHz)'].mean(), inplace=True)

            # Convert pandas columns to numpy arrays before performing operations
            gain_values = np.array(df['Gain (dB)'])
            freq_values = np.array(df['Frequency (GHz)'])

            # Handle infinite values by replacing them with NaN and then replacing NaNs with 0
            gain_values[np.isinf(gain_values)] = np.nan
            freq_values[np.isinf(freq_values)] = np.nan
            
            gain_values = np.nan_to_num(gain_values, nan=0)  # Replace NaNs with 0
            freq_values = np.nan_to_num(freq_values, nan=0)  # Replace NaNs with 0

            # Perform basic data analysis using optimized NumPy functions
            mean_gain = np.mean(gain_values)
            median_gain = np.median(gain_values)
            std_dev_gain = np.std(gain_values)

            # Display analysis results
            st.write(f"Mean Gain: {mean_gain}")
            st.write(f"Median Gain: {median_gain}")
            st.write(f"Standard Deviation of Gain: {std_dev_gain}")

            # Send summary to Groq API for analysis
            data_summary = f"""
            The dataset contains simulation results for antennas. The frequency range is from 1 GHz to 10 GHz. 
            - The antenna's gain increases from 5 dB to 30 dB as frequency increases. 
            - Efficiency is consistently above 90%, with the highest reaching 99%.
            """

            headers = {
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            }

            payload = {
                "messages": [{"role": "user", "content": data_summary}],
                "model": "llama-3.3-70b-versatile"  # Ensure this model is supported by Groq
            }

            # Send the request to Groq API
            response = requests.post(API_URL, json=payload, headers=headers)
            if response.status_code == 200:
                groq_analysis = response.json()["choices"][0]["message"]["content"]
                st.write("Groq's Analysis:")
                st.write(groq_analysis)
            else:
                st.write(f"Error: {response.status_code}, {response.text}")
        else:
            return "Error: Required columns 'Gain (dB)' or 'Frequency (GHz)' not found in the dataset."

    except Exception as e:
        # Return error message if something goes wrong
        st.write(f"An error occurred: {str(e)}")


# Streamlit Interface
st.title("Data Analysis")
st.write("Upload a Excel file to analyze the antenna data and get insights.")

# File upload
uploaded_file = st.file_uploader("Choose a file", type=["xlsx", "csv"])

if uploaded_file is not None:
    results = analyze_file(uploaded_file)
    
    if isinstance(results, tuple):  # If it's a valid result (tuple)
        mean_gain, median_gain, std_dev_gain, groq_analysis = results
        
        st.write(f"Mean Gain: {mean_gain}")
        st.write(f"Median Gain: {median_gain}")
        st.write(f"Standard Deviation of Gain: {std_dev_gain}")
        
        st.write("Zain's Analysis:")
        st.write(groq_analysis)
    else:
        st.write(results)  # Error message