import os
import pandas as pd
import numpy as np
import streamlit as st
import requests

# Groq API Setup
API_KEY = "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"
API_URL = "https://api.groq.com/openai/v1/chat/completions"  # Updated API URL

def analyze_file(uploaded_file):
    try:
        # Load the file into a pandas DataFrame (optimize memory usage)
        if uploaded_file.name.endswith('.csv'):
            df = pd.read_csv(uploaded_file, dtype={'Gain (dB)': 'float32', 'Frequency (GHz)': 'float32'})
        elif uploaded_file.name.endswith('.xlsx'):
            df = pd.read_excel(uploaded_file, dtype={'Gain (dB)': 'float32', 'Frequency (GHz)': 'float32'})
        else:
            return "Error: The uploaded file is neither CSV nor Excel."

        # Clean up the column names by stripping any leading/trailing spaces
        df.columns = df.columns.str.strip()

        # Display the column names and first few rows for debugging
        st.write("Columns in the uploaded file:", df.columns)
        st.write("Preview of the uploaded data:", df.head())

        # Check if required columns are present
        if 'Gain (dB)' in df.columns and 'Frequency (GHz)' in df.columns:
            # Handle NaN values by replacing them with the mean of the column
            df['Gain (dB)'].fillna(df['Gain (dB)'].mean(), inplace=True)
            df['Frequency (GHz)'].fillna(df['Frequency (GHz)'].mean(), inplace=True)

            # Convert pandas columns to numpy arrays before performing operations
            gain_values = np.array(df['Gain (dB)'])
            freq_values = np.array(df['Frequency (GHz)'])

            # Handle infinite values by replacing them with NaN and then replacing NaNs with 0
            gain_values[np.isinf(gain_values)] = np.nan
            freq_values[np.isinf(freq_values)] = np.nan
            
            gain_values = np.nan_to_num(gain_values, nan=0)  # Replace NaNs with 0
            freq_values = np.nan_to_num(freq_values, nan=0)  # Replace NaNs with 0

            # Perform basic data analysis using optimized NumPy functions
            mean_gain = np.mean(gain_values)
            median_gain = np.median(gain_values)
            std_dev_gain = np.std(gain_values)

            # Display analysis results
            st.write(f"Mean Gain: {mean_gain}")
            st.write(f"Median Gain: {median_gain}")
            st.write(f"Standard Deviation of Gain: {std_dev_gain}")

            # Send summary to Groq API for analysis
            data_summary = f"""
            The dataset contains simulation results for antennas. The frequency range is from 1 GHz to 10 GHz. 
            - The antenna's gain increases from 5 dB to 30 dB as frequency increases. 
            - Efficiency is consistently above 90%, with the highest reaching 99%.
            """

            headers = {
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            }

            payload = {
                "messages": [{"role": "user", "content": data_summary}],
                "model": "llama-3.3-70b-versatile"  # Ensure this model is supported by Groq
            }

            # Send the request to Groq API
            response = requests.post(API_URL, json=payload, headers=headers)
            if response.status_code == 200:
                groq_analysis = response.json()["choices"][0]["message"]["content"]
                st.write("Groq's Analysis:")
                st.write(groq_analysis)
            else:
                st.write(f"Error: {response.status_code}, {response.text}")
        else:
            return "Error: Required columns 'Gain (dB)' or 'Frequency (GHz)' not found in the dataset."

    except Exception as e:
        # Return error message if something goes wrong
        st.write(f"An error occurred: {str(e)}")


# Streamlit Interface
st.title("Data Analysis")
st.write("Upload a Excel file to analyze the antenna data and get insights.")

# File upload
uploaded_file = st.file_uploader("Choose a file", type=["xlsx", "csv"])

if uploaded_file is not None:
    results = analyze_file(uploaded_file)
    
    if isinstance(results, tuple):  # If it's a valid result (tuple)
        mean_gain, median_gain, std_dev_gain, groq_analysis = results
        
        st.write(f"Mean Gain: {mean_gain}")
        st.write(f"Median Gain: {median_gain}")
        st.write(f"Standard Deviation of Gain: {std_dev_gain}")
        
        st.write("Zain's Analysis:")
        st.write(groq_analysis)
    else:
        st.write(results)  # Error message